001 /*
002 * Copyright 2007-2013 UnboundID Corp.
003 * All Rights Reserved.
004 */
005 /*
006 * Copyright (C) 2008-2013 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021 package com.unboundid.ldif;
022
023
024
025 import java.io.BufferedReader;
026 import java.io.BufferedWriter;
027 import java.io.File;
028 import java.io.FileInputStream;
029 import java.io.FileWriter;
030 import java.io.InputStream;
031 import java.io.InputStreamReader;
032 import java.io.IOException;
033 import java.text.ParseException;
034 import java.util.ArrayList;
035 import java.util.Collection;
036 import java.util.Iterator;
037 import java.util.LinkedHashMap;
038 import java.util.List;
039 import java.util.concurrent.BlockingQueue;
040 import java.util.concurrent.ArrayBlockingQueue;
041 import java.util.concurrent.TimeUnit;
042 import java.util.concurrent.atomic.AtomicBoolean;
043 import java.nio.charset.Charset;
044
045 import com.unboundid.asn1.ASN1OctetString;
046 import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule;
047 import com.unboundid.ldap.matchingrules.MatchingRule;
048 import com.unboundid.ldap.sdk.Attribute;
049 import com.unboundid.ldap.sdk.Entry;
050 import com.unboundid.ldap.sdk.Modification;
051 import com.unboundid.ldap.sdk.ModificationType;
052 import com.unboundid.ldap.sdk.LDAPException;
053 import com.unboundid.ldap.sdk.schema.Schema;
054 import com.unboundid.util.AggregateInputStream;
055 import com.unboundid.util.Base64;
056 import com.unboundid.util.LDAPSDKThreadFactory;
057 import com.unboundid.util.ThreadSafety;
058 import com.unboundid.util.ThreadSafetyLevel;
059 import com.unboundid.util.parallel.AsynchronousParallelProcessor;
060 import com.unboundid.util.parallel.Result;
061 import com.unboundid.util.parallel.ParallelProcessor;
062 import com.unboundid.util.parallel.Processor;
063
064 import static com.unboundid.ldif.LDIFMessages.*;
065 import static com.unboundid.util.Debug.*;
066 import static com.unboundid.util.StaticUtils.*;
067 import static com.unboundid.util.Validator.*;
068
069 /**
070 * This class provides an LDIF reader, which can be used to read and decode
071 * entries and change records from a data source using the LDAP Data Interchange
072 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>.
073 * <BR>
074 * This class is not synchronized. If multiple threads read from the
075 * LDIFReader, they must be synchronized externally.
076 * <BR><BR>
077 * <H2>Example</H2>
078 * The following example iterates through all entries contained in an LDIF file
079 * and attempts to add them to a directory server:
080 * <PRE>
081 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile);
082 *
083 * while (true)
084 * {
085 * Entry entry;
086 * try
087 * {
088 * entry = ldifReader.readEntry();
089 * if (entry == null)
090 * {
091 * System.err.println("All entries have been processed.");
092 * break;
093 * }
094 * }
095 * catch (LDIFException le)
096 * {
097 * if (le.mayContinueReading())
098 * {
099 * System.err.println("A recoverable occurred while attempting to " +
100 * "read an entry at or near line number " + le.getLineNumber() +
101 * ": " + le.getMessage());
102 * System.err.println("The entry will be skipped.");
103 * continue;
104 * }
105 * else
106 * {
107 * System.err.println("An unrecoverable occurred while attempting to " +
108 * "read an entry at or near line number " + le.getLineNumber() +
109 * ": " + le.getMessage());
110 * System.err.println("LDIF processing will be aborted.");
111 * break;
112 * }
113 * }
114 * catch (IOException ioe)
115 * {
116 * System.err.println("An I/O error occurred while attempting to read " +
117 * "from the LDIF file: " + ioe.getMessage());
118 * System.err.println("LDIF processing will be aborted.");
119 * break;
120 * }
121 *
122 * try
123 * {
124 * connection.add(entry);
125 * System.out.println("Successfully added entry " + entry.getDN());
126 * }
127 * catch (LDAPException le)
128 * {
129 * System.err.println("Unable to add entry " + entry.getDN() + " -- " +
130 * le.getMessage());
131 * }
132 * }
133 *
134 * ldifReader.close();
135 * </PRE>
136 */
137 @ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
138 public final class LDIFReader
139 {
140 /**
141 * The default buffer size (128KB) that will be used when reading from the
142 * data source.
143 */
144 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024;
145
146
147
148 /*
149 * When processing asynchronously, this determines how many of the allocated
150 * worker threads are used to parse each batch of read entries.
151 */
152 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3;
153
154
155
156 /**
157 * When processing asynchronously, this specifies the size of the pending and
158 * completed queues.
159 */
160 private static final int ASYNC_QUEUE_SIZE = 500;
161
162
163
164 /**
165 * Special entry used internally to signal that the LDIFReaderEntryTranslator
166 * has signalled that a read Entry should be skipped by returning null,
167 * which normally implies EOF.
168 */
169 private static final Entry SKIP_ENTRY = new Entry("cn=skipped");
170
171
172
173 /**
174 * The default base path that will be prepended to relative paths. It will
175 * end with a trailing slash.
176 */
177 private static final String DEFAULT_RELATIVE_BASE_PATH;
178 static
179 {
180 final File currentDir;
181 String currentDirString = System.getProperty("user.dir");
182 if (currentDirString == null)
183 {
184 currentDir = new File(".");
185 }
186 else
187 {
188 currentDir = new File(currentDirString);
189 }
190
191 final String currentDirAbsolutePath = currentDir.getAbsolutePath();
192 if (currentDirAbsolutePath.endsWith(File.separator))
193 {
194 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath;
195 }
196 else
197 {
198 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator;
199 }
200 }
201
202
203
204 // The buffered reader that will be used to read LDIF data.
205 private final BufferedReader reader;
206
207 // The behavior that should be exhibited when encountering duplicate attribute
208 // values.
209 private volatile DuplicateValueBehavior duplicateValueBehavior;
210
211 // A line number counter.
212 private long lineNumberCounter = 0;
213
214 private final LDIFReaderEntryTranslator entryTranslator;
215
216 // The schema that will be used when processing, if applicable.
217 private Schema schema;
218
219 // Specifies the base path that will be prepended to relative paths for file
220 // URLs.
221 private volatile String relativeBasePath;
222
223 // The behavior that should be exhibited with regard to illegal trailing
224 // spaces in attribute values.
225 private volatile TrailingSpaceBehavior trailingSpaceBehavior;
226
227 // True iff we are processing asynchronously.
228 private final boolean isAsync;
229
230 //
231 // The following only apply to asynchronous processing.
232 //
233
234 // Parses entries asynchronously.
235 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord>
236 asyncParser;
237
238 // Set to true when the end of the input is reached.
239 private final AtomicBoolean asyncParsingComplete;
240
241 // The records that have been read and parsed.
242 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>>
243 asyncParsedRecords;
244
245
246
247 /**
248 * Creates a new LDIF reader that will read data from the specified file.
249 *
250 * @param path The path to the file from which the data is to be read. It
251 * must not be {@code null}.
252 *
253 * @throws IOException If a problem occurs while opening the file for
254 * reading.
255 */
256 public LDIFReader(final String path)
257 throws IOException
258 {
259 this(new FileInputStream(path));
260 }
261
262
263
264 /**
265 * Creates a new LDIF reader that will read data from the specified file
266 * and parses the LDIF records asynchronously using the specified number of
267 * threads.
268 *
269 * @param path The path to the file from which the data is to be read. It
270 * must not be {@code null}.
271 * @param numParseThreads If this value is greater than zero, then the
272 * specified number of threads will be used to
273 * asynchronously read and parse the LDIF file.
274 *
275 * @throws IOException If a problem occurs while opening the file for
276 * reading.
277 *
278 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
279 * constructor for more details about asynchronous processing.
280 */
281 public LDIFReader(final String path, final int numParseThreads)
282 throws IOException
283 {
284 this(new FileInputStream(path), numParseThreads);
285 }
286
287
288
289 /**
290 * Creates a new LDIF reader that will read data from the specified file.
291 *
292 * @param file The file from which the data is to be read. It must not be
293 * {@code null}.
294 *
295 * @throws IOException If a problem occurs while opening the file for
296 * reading.
297 */
298 public LDIFReader(final File file)
299 throws IOException
300 {
301 this(new FileInputStream(file));
302 }
303
304
305
306 /**
307 * Creates a new LDIF reader that will read data from the specified file
308 * and optionally parses the LDIF records asynchronously using the specified
309 * number of threads.
310 *
311 * @param file The file from which the data is to be read. It
312 * must not be {@code null}.
313 * @param numParseThreads If this value is greater than zero, then the
314 * specified number of threads will be used to
315 * asynchronously read and parse the LDIF file.
316 *
317 * @throws IOException If a problem occurs while opening the file for
318 * reading.
319 */
320 public LDIFReader(final File file, final int numParseThreads)
321 throws IOException
322 {
323 this(new FileInputStream(file), numParseThreads);
324 }
325
326
327
328 /**
329 * Creates a new LDIF reader that will read data from the specified files in
330 * the order in which they are provided and optionally parses the LDIF records
331 * asynchronously using the specified number of threads.
332 *
333 * @param files The files from which the data is to be read. It
334 * must not be {@code null} or empty.
335 * @param numParseThreads If this value is greater than zero, then the
336 * specified number of threads will be used to
337 * asynchronously read and parse the LDIF file.
338 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries
339 * before they are returned. This is normally
340 * {@code null}, which causes entries to be returned
341 * unaltered. This is particularly useful when
342 * parsing the input file in parallel because the
343 * entry translation is also done in parallel.
344 *
345 * @throws IOException If a problem occurs while opening the file for
346 * reading.
347 */
348 public LDIFReader(final File[] files, final int numParseThreads,
349 final LDIFReaderEntryTranslator entryTranslator)
350 throws IOException
351 {
352 this(createAggregateInputStream(files), numParseThreads, entryTranslator);
353 }
354
355
356
357 /**
358 * Creates a new aggregate input stream that will read data from the specified
359 * files. If there are multiple files, then a "padding" file will be inserted
360 * between them to ensure that there is at least one blank line between the
361 * end of one file and the beginning of another.
362 *
363 * @param files The files from which the data is to be read. It must not be
364 * {@code null} or empty.
365 *
366 * @return The input stream to use to read data from the provided files.
367 *
368 * @throws IOException If a problem is encountered while attempting to
369 * create the input stream.
370 */
371 private static InputStream createAggregateInputStream(final File... files)
372 throws IOException
373 {
374 if (files.length == 0)
375 {
376 throw new IOException(ERR_READ_NO_LDIF_FILES.get());
377 }
378 else if (files.length == 1)
379 {
380 return new FileInputStream(files[0]);
381 }
382 else
383 {
384 final File spacerFile =
385 File.createTempFile("ldif-reader-spacer", ".ldif");
386 spacerFile.deleteOnExit();
387
388 final BufferedWriter spacerWriter =
389 new BufferedWriter(new FileWriter(spacerFile));
390 try
391 {
392 spacerWriter.newLine();
393 spacerWriter.newLine();
394 }
395 finally
396 {
397 spacerWriter.close();
398 }
399
400 final File[] returnArray = new File[(files.length * 2) - 1];
401 returnArray[0] = files[0];
402
403 int pos = 1;
404 for (int i=1; i < files.length; i++)
405 {
406 returnArray[pos++] = spacerFile;
407 returnArray[pos++] = files[i];
408 }
409
410 return new AggregateInputStream(returnArray);
411 }
412 }
413
414
415
416 /**
417 * Creates a new LDIF reader that will read data from the provided input
418 * stream.
419 *
420 * @param inputStream The input stream from which the data is to be read.
421 * It must not be {@code null}.
422 */
423 public LDIFReader(final InputStream inputStream)
424 {
425 this(inputStream, 0);
426 }
427
428
429
430 /**
431 * Creates a new LDIF reader that will read data from the specified stream
432 * and parses the LDIF records asynchronously using the specified number of
433 * threads.
434 *
435 * @param inputStream The input stream from which the data is to be read.
436 * It must not be {@code null}.
437 * @param numParseThreads If this value is greater than zero, then the
438 * specified number of threads will be used to
439 * asynchronously read and parse the LDIF file.
440 *
441 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
442 * constructor for more details about asynchronous processing.
443 */
444 public LDIFReader(final InputStream inputStream, final int numParseThreads)
445 {
446 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
447 this(new BufferedReader(new InputStreamReader(inputStream,
448 Charset.forName("UTF-8")),
449 DEFAULT_BUFFER_SIZE),
450 numParseThreads);
451 }
452
453
454
455 /**
456 * Creates a new LDIF reader that will read data from the specified stream
457 * and parses the LDIF records asynchronously using the specified number of
458 * threads.
459 *
460 * @param inputStream The input stream from which the data is to be read.
461 * It must not be {@code null}.
462 * @param numParseThreads If this value is greater than zero, then the
463 * specified number of threads will be used to
464 * asynchronously read and parse the LDIF file.
465 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
466 * entries before they are returned. This is normally
467 * {@code null}, which causes entries to be returned
468 * unaltered. This is particularly useful when parsing
469 * the input file in parallel because the entry
470 * translation is also done in parallel.
471 *
472 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
473 * constructor for more details about asynchronous processing.
474 */
475 public LDIFReader(final InputStream inputStream, final int numParseThreads,
476 final LDIFReaderEntryTranslator entryTranslator)
477 {
478 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
479 this(new BufferedReader(new InputStreamReader(inputStream,
480 Charset.forName("UTF-8")),
481 DEFAULT_BUFFER_SIZE),
482 numParseThreads, entryTranslator);
483 }
484
485
486
487 /**
488 * Creates a new LDIF reader that will use the provided buffered reader to
489 * read the LDIF data. The encoding of the underlying Reader must be set to
490 * "UTF-8" as required by RFC 2849.
491 *
492 * @param reader The buffered reader that will be used to read the LDIF
493 * data. It must not be {@code null}.
494 */
495 public LDIFReader(final BufferedReader reader)
496 {
497 this(reader, 0);
498 }
499
500
501
502 /**
503 * Creates a new LDIF reader that will read data from the specified buffered
504 * reader and parses the LDIF records asynchronously using the specified
505 * number of threads. The encoding of the underlying Reader must be set to
506 * "UTF-8" as required by RFC 2849.
507 *
508 * @param reader The buffered reader that will be used to read the LDIF data.
509 * It must not be {@code null}.
510 * @param numParseThreads If this value is greater than zero, then the
511 * specified number of threads will be used to
512 * asynchronously read and parse the LDIF file.
513 *
514 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
515 * constructor for more details about asynchronous processing.
516 */
517 public LDIFReader(final BufferedReader reader, final int numParseThreads)
518 {
519 this(reader, numParseThreads, null);
520 }
521
522
523
524 /**
525 * Creates a new LDIF reader that will read data from the specified buffered
526 * reader and parses the LDIF records asynchronously using the specified
527 * number of threads. The encoding of the underlying Reader must be set to
528 * "UTF-8" as required by RFC 2849.
529 *
530 * @param reader The buffered reader that will be used to read the LDIF data.
531 * It must not be {@code null}.
532 * @param numParseThreads If this value is greater than zero, then the
533 * specified number of threads will be used to
534 * asynchronously read and parse the LDIF file.
535 * This should only be set to greater than zero when
536 * performance analysis has demonstrated that reading
537 * and parsing the LDIF is a bottleneck. The default
538 * synchronous processing is normally fast enough.
539 * There is little benefit in passing in a value
540 * greater than four (unless there is an
541 * LDIFReaderEntryTranslator that does time-consuming
542 * processing). A value of zero implies the
543 * default behavior of reading and parsing LDIF
544 * records synchronously when one of the read
545 * methods is called.
546 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
547 * entries before they are returned. This is normally
548 * {@code null}, which causes entries to be returned
549 * unaltered. This is particularly useful when parsing
550 * the input file in parallel because the entry
551 * translation is also done in parallel.
552 */
553 public LDIFReader(final BufferedReader reader,
554 final int numParseThreads,
555 final LDIFReaderEntryTranslator entryTranslator)
556 {
557 ensureNotNull(reader);
558 ensureTrue(numParseThreads >= 0,
559 "LDIFReader.numParseThreads must not be negative.");
560
561 this.reader = reader;
562 this.entryTranslator = entryTranslator;
563
564 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
565 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
566
567 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH;
568
569 if (numParseThreads == 0)
570 {
571 isAsync = false;
572 asyncParser = null;
573 asyncParsingComplete = null;
574 asyncParsedRecords = null;
575 }
576 else
577 {
578 isAsync = true;
579 asyncParsingComplete = new AtomicBoolean(false);
580
581 // Decodes entries in parallel.
582 final LDAPSDKThreadFactory threadFactory =
583 new LDAPSDKThreadFactory("LDIFReader Worker", true, null);
584 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser =
585 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>(
586 new RecordParser(), threadFactory, numParseThreads,
587 ASYNC_MIN_PER_PARSING_THREAD);
588
589 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new
590 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE);
591
592 // The output queue must be a little more than twice as big as the input
593 // queue to more easily handle being shutdown in the middle of processing
594 // when the queues are full and threads are blocked.
595 asyncParsedRecords = new ArrayBlockingQueue
596 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100);
597
598 asyncParser = new AsynchronousParallelProcessor
599 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser,
600 asyncParsedRecords);
601
602 final LineReaderThread lineReaderThread = new LineReaderThread();
603 lineReaderThread.start();
604 }
605 }
606
607
608
609 /**
610 * Reads entries from the LDIF file with the specified path and returns them
611 * as a {@code List}. This is a convenience method that should only be used
612 * for data sets that are small enough so that running out of memory isn't a
613 * concern.
614 *
615 * @param path The path to the LDIF file containing the entries to be read.
616 *
617 * @return A list of the entries read from the given LDIF file.
618 *
619 * @throws IOException If a problem occurs while attempting to read data
620 * from the specified file.
621 *
622 * @throws LDIFException If a problem is encountered while attempting to
623 * decode data read as LDIF.
624 */
625 public static List<Entry> readEntries(final String path)
626 throws IOException, LDIFException
627 {
628 return readEntries(new LDIFReader(path));
629 }
630
631
632
633 /**
634 * Reads entries from the specified LDIF file and returns them as a
635 * {@code List}. This is a convenience method that should only be used for
636 * data sets that are small enough so that running out of memory isn't a
637 * concern.
638 *
639 * @param file A reference to the LDIF file containing the entries to be
640 * read.
641 *
642 * @return A list of the entries read from the given LDIF file.
643 *
644 * @throws IOException If a problem occurs while attempting to read data
645 * from the specified file.
646 *
647 * @throws LDIFException If a problem is encountered while attempting to
648 * decode data read as LDIF.
649 */
650 public static List<Entry> readEntries(final File file)
651 throws IOException, LDIFException
652 {
653 return readEntries(new LDIFReader(file));
654 }
655
656
657
658 /**
659 * Reads and decodes LDIF entries from the provided input stream and
660 * returns them as a {@code List}. This is a convenience method that should
661 * only be used for data sets that are small enough so that running out of
662 * memory isn't a concern.
663 *
664 * @param inputStream The input stream from which the entries should be
665 * read. The input stream will be closed before
666 * returning.
667 *
668 * @return A list of the entries read from the given input stream.
669 *
670 * @throws IOException If a problem occurs while attempting to read data
671 * from the input stream.
672 *
673 * @throws LDIFException If a problem is encountered while attempting to
674 * decode data read as LDIF.
675 */
676 public static List<Entry> readEntries(final InputStream inputStream)
677 throws IOException, LDIFException
678 {
679 return readEntries(new LDIFReader(inputStream));
680 }
681
682
683
684 /**
685 * Reads entries from the provided LDIF reader and returns them as a list.
686 *
687 * @param reader The reader from which the entries should be read. It will
688 * be closed before returning.
689 *
690 * @return A list of the entries read from the provided reader.
691 *
692 * @throws IOException If a problem was encountered while attempting to read
693 * data from the LDIF data source.
694 *
695 * @throws LDIFException If a problem is encountered while attempting to
696 * decode data read as LDIF.
697 */
698 private static List<Entry> readEntries(final LDIFReader reader)
699 throws IOException, LDIFException
700 {
701 try
702 {
703 final ArrayList<Entry> entries = new ArrayList<Entry>(10);
704 while (true)
705 {
706 final Entry e = reader.readEntry();
707 if (e == null)
708 {
709 break;
710 }
711
712 entries.add(e);
713 }
714
715 return entries;
716 }
717 finally
718 {
719 reader.close();
720 }
721 }
722
723
724
725 /**
726 * Closes this LDIF reader and the underlying LDIF source.
727 *
728 * @throws IOException If a problem occurs while closing the underlying LDIF
729 * source.
730 */
731 public void close()
732 throws IOException
733 {
734 reader.close();
735
736 if (isAsync())
737 {
738 // Closing the reader will trigger the LineReaderThread to complete, but
739 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid
740 // this, we clear out the completed output queue, which is larger than
741 // the input queue, so the LineReaderThread will stop reading and
742 // shutdown the asyncParser.
743 asyncParsedRecords.clear();
744 }
745 }
746
747
748
749 /**
750 * Indicates whether to ignore any duplicate values encountered while reading
751 * LDIF records.
752 *
753 * @return {@code true} if duplicate values should be ignored, or
754 * {@code false} if any LDIF records containing duplicate values
755 * should be rejected.
756 *
757 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead.
758 */
759 @Deprecated()
760 public boolean ignoreDuplicateValues()
761 {
762 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP);
763 }
764
765
766
767 /**
768 * Specifies whether to ignore any duplicate values encountered while reading
769 * LDIF records.
770 *
771 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
772 * attribute values encountered while reading
773 * LDIF records.
774 *
775 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead.
776 */
777 @Deprecated()
778 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues)
779 {
780 if (ignoreDuplicateValues)
781 {
782 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
783 }
784 else
785 {
786 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
787 }
788 }
789
790
791
792 /**
793 * Retrieves the behavior that should be exhibited if the LDIF reader
794 * encounters an entry with duplicate values.
795 *
796 * @return The behavior that should be exhibited if the LDIF reader
797 * encounters an entry with duplicate values.
798 */
799 public DuplicateValueBehavior getDuplicateValueBehavior()
800 {
801 return duplicateValueBehavior;
802 }
803
804
805
806 /**
807 * Specifies the behavior that should be exhibited if the LDIF reader
808 * encounters an entry with duplicate values.
809 *
810 * @param duplicateValueBehavior The behavior that should be exhibited if
811 * the LDIF reader encounters an entry with
812 * duplicate values.
813 */
814 public void setDuplicateValueBehavior(
815 final DuplicateValueBehavior duplicateValueBehavior)
816 {
817 this.duplicateValueBehavior = duplicateValueBehavior;
818 }
819
820
821
822 /**
823 * Indicates whether to strip off any illegal trailing spaces that may appear
824 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
825 * specification strongly recommends that any value which legitimately
826 * contains trailing spaces be base64-encoded, and any spaces which appear
827 * after the end of non-base64-encoded values may therefore be considered
828 * invalid. If any such trailing spaces are encountered in an LDIF record and
829 * they are not to be stripped, then an {@link LDIFException} will be thrown
830 * for that record.
831 * <BR><BR>
832 * Note that this applies only to spaces after the end of a value, and not to
833 * spaces which may appear at the end of a line for a value that is wrapped
834 * and continued on the next line.
835 *
836 * @return {@code true} if illegal trailing spaces should be stripped off, or
837 * {@code false} if LDIF records containing illegal trailing spaces
838 * should be rejected.
839 *
840 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead.
841 */
842 @Deprecated()
843 public boolean stripTrailingSpaces()
844 {
845 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP);
846 }
847
848
849
850 /**
851 * Specifies whether to strip off any illegal trailing spaces that may appear
852 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
853 * specification strongly recommends that any value which legitimately
854 * contains trailing spaces be base64-encoded, and any spaces which appear
855 * after the end of non-base64-encoded values may therefore be considered
856 * invalid. If any such trailing spaces are encountered in an LDIF record and
857 * they are not to be stripped, then an {@link LDIFException} will be thrown
858 * for that record.
859 * <BR><BR>
860 * Note that this applies only to spaces after the end of a value, and not to
861 * spaces which may appear at the end of a line for a value that is wrapped
862 * and continued on the next line.
863 *
864 * @param stripTrailingSpaces Indicates whether to strip off any illegal
865 * trailing spaces, or {@code false} if LDIF
866 * records containing them should be rejected.
867 *
868 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead.
869 */
870 @Deprecated()
871 public void setStripTrailingSpaces(final boolean stripTrailingSpaces)
872 {
873 trailingSpaceBehavior = stripTrailingSpaces
874 ? TrailingSpaceBehavior.STRIP
875 : TrailingSpaceBehavior.REJECT;
876 }
877
878
879
880 /**
881 * Retrieves the behavior that should be exhibited when encountering attribute
882 * values which are not base64-encoded but contain trailing spaces. The LDIF
883 * specification strongly recommends that any value which legitimately
884 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
885 * may be configured to automatically strip these spaces, to preserve them, or
886 * to reject any entry or change record containing them.
887 *
888 * @return The behavior that should be exhibited when encountering attribute
889 * values which are not base64-encoded but contain trailing spaces.
890 */
891 public TrailingSpaceBehavior getTrailingSpaceBehavior()
892 {
893 return trailingSpaceBehavior;
894 }
895
896
897
898 /**
899 * Specifies the behavior that should be exhibited when encountering attribute
900 * values which are not base64-encoded but contain trailing spaces. The LDIF
901 * specification strongly recommends that any value which legitimately
902 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
903 * may be configured to automatically strip these spaces, to preserve them, or
904 * to reject any entry or change record containing them.
905 *
906 * @param trailingSpaceBehavior The behavior that should be exhibited when
907 * encountering attribute values which are not
908 * base64-encoded but contain trailing spaces.
909 */
910 public void setTrailingSpaceBehavior(
911 final TrailingSpaceBehavior trailingSpaceBehavior)
912 {
913 this.trailingSpaceBehavior = trailingSpaceBehavior;
914 }
915
916
917
918 /**
919 * Retrieves the base path that will be prepended to relative paths in order
920 * to obtain an absolute path. This will only be used for "file:" URLs that
921 * have paths which do not begin with a slash.
922 *
923 * @return The base path that will be prepended to relative paths in order to
924 * obtain an absolute path.
925 */
926 public String getRelativeBasePath()
927 {
928 return relativeBasePath;
929 }
930
931
932
933 /**
934 * Specifies the base path that will be prepended to relative paths in order
935 * to obtain an absolute path. This will only be used for "file:" URLs that
936 * have paths which do not begin with a space.
937 *
938 * @param relativeBasePath The base path that will be prepended to relative
939 * paths in order to obtain an absolute path.
940 */
941 public void setRelativeBasePath(final String relativeBasePath)
942 {
943 setRelativeBasePath(new File(relativeBasePath));
944 }
945
946
947
948 /**
949 * Specifies the base path that will be prepended to relative paths in order
950 * to obtain an absolute path. This will only be used for "file:" URLs that
951 * have paths which do not begin with a space.
952 *
953 * @param relativeBasePath The base path that will be prepended to relative
954 * paths in order to obtain an absolute path.
955 */
956 public void setRelativeBasePath(final File relativeBasePath)
957 {
958 final String path = relativeBasePath.getAbsolutePath();
959 if (path.endsWith(File.separator))
960 {
961 this.relativeBasePath = path;
962 }
963 else
964 {
965 this.relativeBasePath = path + File.separator;
966 }
967 }
968
969
970
971 /**
972 * Retrieves the schema that will be used when reading LDIF records, if
973 * defined.
974 *
975 * @return The schema that will be used when reading LDIF records, or
976 * {@code null} if no schema should be used and all attributes should
977 * be treated as case-insensitive strings.
978 */
979 public Schema getSchema()
980 {
981 return schema;
982 }
983
984
985
986 /**
987 * Specifies the schema that should be used when reading LDIF records.
988 *
989 * @param schema The schema that should be used when reading LDIF records,
990 * or {@code null} if no schema should be used and all
991 * attributes should be treated as case-insensitive strings.
992 */
993 public void setSchema(final Schema schema)
994 {
995 this.schema = schema;
996 }
997
998
999
1000 /**
1001 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1002 * change record.
1003 *
1004 * @return The record read from the LDIF source, or {@code null} if there are
1005 * no more entries to be read.
1006 *
1007 * @throws IOException If a problem occurs while trying to read from the
1008 * LDIF source.
1009 *
1010 * @throws LDIFException If the data read could not be parsed as an entry or
1011 * an LDIF change record.
1012 */
1013 public LDIFRecord readLDIFRecord()
1014 throws IOException, LDIFException
1015 {
1016 if (isAsync())
1017 {
1018 return readLDIFRecordAsync();
1019 }
1020 else
1021 {
1022 return readLDIFRecordInternal();
1023 }
1024 }
1025
1026
1027
1028 /**
1029 * Reads an entry from the LDIF source.
1030 *
1031 * @return The entry read from the LDIF source, or {@code null} if there are
1032 * no more entries to be read.
1033 *
1034 * @throws IOException If a problem occurs while attempting to read from the
1035 * LDIF source.
1036 *
1037 * @throws LDIFException If the data read could not be parsed as an entry.
1038 */
1039 public Entry readEntry()
1040 throws IOException, LDIFException
1041 {
1042 if (isAsync())
1043 {
1044 return readEntryAsync();
1045 }
1046 else
1047 {
1048 return readEntryInternal();
1049 }
1050 }
1051
1052
1053
1054 /**
1055 * Reads an LDIF change record from the LDIF source. The LDIF record must
1056 * have a changetype.
1057 *
1058 * @return The change record read from the LDIF source, or {@code null} if
1059 * there are no more records to be read.
1060 *
1061 * @throws IOException If a problem occurs while attempting to read from the
1062 * LDIF source.
1063 *
1064 * @throws LDIFException If the data read could not be parsed as an LDIF
1065 * change record.
1066 */
1067 public LDIFChangeRecord readChangeRecord()
1068 throws IOException, LDIFException
1069 {
1070 return readChangeRecord(false);
1071 }
1072
1073
1074
1075 /**
1076 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1077 * record does not have a changetype, then it may be assumed to be an add
1078 * change record.
1079 *
1080 * @param defaultAdd Indicates whether an LDIF record not containing a
1081 * changetype should be retrieved as an add change record.
1082 * If this is {@code false} and the record read does not
1083 * include a changetype, then an {@link LDIFException}
1084 * will be thrown.
1085 *
1086 * @return The change record read from the LDIF source, or {@code null} if
1087 * there are no more records to be read.
1088 *
1089 * @throws IOException If a problem occurs while attempting to read from the
1090 * LDIF source.
1091 *
1092 * @throws LDIFException If the data read could not be parsed as an LDIF
1093 * change record.
1094 */
1095 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd)
1096 throws IOException, LDIFException
1097 {
1098 if (isAsync())
1099 {
1100 return readChangeRecordAsync(defaultAdd);
1101 }
1102 else
1103 {
1104 return readChangeRecordInternal(defaultAdd);
1105 }
1106 }
1107
1108
1109
1110 /**
1111 * Reads the next {@code LDIFRecord}, which was read and parsed by a different
1112 * thread.
1113 *
1114 * @return The next parsed record or {@code null} if there are no more
1115 * records to read.
1116 *
1117 * @throws IOException If IOException was thrown when reading or parsing
1118 * the record.
1119 *
1120 * @throws LDIFException If LDIFException was thrown parsing the record.
1121 */
1122 private LDIFRecord readLDIFRecordAsync()
1123 throws IOException, LDIFException
1124 {
1125 final Result<UnparsedLDIFRecord, LDIFRecord> result =
1126 readLDIFRecordResultAsync();
1127 if (result == null)
1128 {
1129 return null;
1130 }
1131 else
1132 {
1133 return result.getOutput();
1134 }
1135 }
1136
1137
1138
1139 /**
1140 * Reads an entry asynchronously from the LDIF source.
1141 *
1142 * @return The entry read from the LDIF source, or {@code null} if there are
1143 * no more entries to be read.
1144 *
1145 * @throws IOException If a problem occurs while attempting to read from the
1146 * LDIF source.
1147 * @throws LDIFException If the data read could not be parsed as an entry.
1148 */
1149 private Entry readEntryAsync()
1150 throws IOException, LDIFException
1151 {
1152 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1153 LDIFRecord record = null;
1154 while (record == null)
1155 {
1156 result = readLDIFRecordResultAsync();
1157 if (result == null)
1158 {
1159 return null;
1160 }
1161
1162 record = result.getOutput();
1163
1164 // This is a special value that means we should skip this Entry. We have
1165 // to use something different than null because null means EOF.
1166 if (record == SKIP_ENTRY)
1167 {
1168 record = null;
1169 }
1170 }
1171
1172 if (!(record instanceof Entry))
1173 {
1174 try
1175 {
1176 // Some LDIFChangeRecord can be converted to an Entry. This is really
1177 // an edge case though.
1178 return ((LDIFChangeRecord)record).toEntry();
1179 }
1180 catch (LDIFException e)
1181 {
1182 debugException(e);
1183 final long firstLineNumber = result.getInput().getFirstLineNumber();
1184 throw new LDIFException(e.getExceptionMessage(),
1185 firstLineNumber, true, e);
1186 }
1187 }
1188
1189 return (Entry) record;
1190 }
1191
1192
1193
1194 /**
1195 * Reads an LDIF change record from the LDIF source asynchronously.
1196 * Optionally, if the LDIF record does not have a changetype, then it may be
1197 * assumed to be an add change record.
1198 *
1199 * @param defaultAdd Indicates whether an LDIF record not containing a
1200 * changetype should be retrieved as an add change record.
1201 * If this is {@code false} and the record read does not
1202 * include a changetype, then an {@link LDIFException} will
1203 * be thrown.
1204 *
1205 * @return The change record read from the LDIF source, or {@code null} if
1206 * there are no more records to be read.
1207 *
1208 * @throws IOException If a problem occurs while attempting to read from the
1209 * LDIF source.
1210 * @throws LDIFException If the data read could not be parsed as an LDIF
1211 * change record.
1212 */
1213 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd)
1214 throws IOException, LDIFException
1215 {
1216 final Result<UnparsedLDIFRecord, LDIFRecord> result =
1217 readLDIFRecordResultAsync();
1218 if (result == null)
1219 {
1220 return null;
1221 }
1222
1223 final LDIFRecord record = result.getOutput();
1224 if (record instanceof LDIFChangeRecord)
1225 {
1226 return (LDIFChangeRecord) record;
1227 }
1228 else if (record instanceof Entry)
1229 {
1230 if (defaultAdd)
1231 {
1232 return new LDIFAddChangeRecord((Entry) record);
1233 }
1234 else
1235 {
1236 final long firstLineNumber = result.getInput().getFirstLineNumber();
1237 throw new LDIFException(
1238 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber,
1239 true);
1240 }
1241 }
1242
1243 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1244 "LDIFChangeRecord");
1245 }
1246
1247
1248
1249 /**
1250 * Reads the next LDIF record, which was read and parsed asynchronously by
1251 * separate threads.
1252 *
1253 * @return The next LDIF record or {@code null} if there are no more records.
1254 *
1255 * @throws IOException If a problem occurs while attempting to read from the
1256 * LDIF source.
1257 *
1258 * @throws LDIFException If the data read could not be parsed as an entry.
1259 */
1260 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync()
1261 throws IOException, LDIFException
1262 {
1263 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1264
1265 // If the asynchronous reading and parsing is complete, then we don't have
1266 // to block waiting for the next record to show up on the queue. If there
1267 // isn't a record there, then return null (EOF) right away.
1268 if (asyncParsingComplete.get())
1269 {
1270 result = asyncParsedRecords.poll();
1271 }
1272 else
1273 {
1274 try
1275 {
1276 // We probably could just do a asyncParsedRecords.take() here, but
1277 // there are some edge case error scenarios where
1278 // asyncParsingComplete might be set without a special EOF sentinel
1279 // Result enqueued. So to guard against this, we have a very cautious
1280 // polling interval of 1 second. During normal processing, we never
1281 // have to wait for this to expire, when there is something to do
1282 // (like shutdown).
1283 while ((result == null) && (!asyncParsingComplete.get()))
1284 {
1285 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS);
1286 }
1287
1288 // There's a very small chance that we missed the value, so double-check
1289 if (result == null)
1290 {
1291 result = asyncParsedRecords.poll();
1292 }
1293 }
1294 catch (InterruptedException e)
1295 {
1296 debugException(e);
1297 throw new IOException(getExceptionMessage(e));
1298 }
1299 }
1300 if (result == null)
1301 {
1302 return null;
1303 }
1304
1305 rethrow(result.getFailureCause());
1306
1307 // Check if we reached the end of the input
1308 final UnparsedLDIFRecord unparsedRecord = result.getInput();
1309 if (unparsedRecord.isEOF())
1310 {
1311 // This might have been set already by the LineReaderThread, but
1312 // just in case it hasn't gotten to it yet, do so here.
1313 asyncParsingComplete.set(true);
1314
1315 // Enqueue this EOF result again for any other thread that might be
1316 // blocked in asyncParsedRecords.take() even though having multiple
1317 // threads call this method concurrently breaks the contract of this
1318 // class.
1319 try
1320 {
1321 asyncParsedRecords.put(result);
1322 }
1323 catch (InterruptedException e)
1324 {
1325 // We shouldn't ever get interrupted because the put won't ever block.
1326 // Once we are done reading, this is the only item left in the queue,
1327 // so we should always be able to re-enqueue it.
1328 debugException(e);
1329 }
1330 return null;
1331 }
1332
1333 return result;
1334 }
1335
1336
1337
1338 /**
1339 * Indicates whether this LDIF reader was constructed to perform asynchronous
1340 * processing.
1341 *
1342 * @return {@code true} if this LDIFReader was constructed to perform
1343 * asynchronous processing, or {@code false} if not.
1344 */
1345 private boolean isAsync()
1346 {
1347 return isAsync;
1348 }
1349
1350
1351
1352 /**
1353 * If not {@code null}, rethrows the specified Throwable as either an
1354 * IOException or LDIFException.
1355 *
1356 * @param t The exception to rethrow. If it's {@code null}, then nothing
1357 * is thrown.
1358 *
1359 * @throws IOException If t is an IOException or a checked Exception that
1360 * is not an LDIFException.
1361 * @throws LDIFException If t is an LDIFException.
1362 */
1363 static void rethrow(final Throwable t)
1364 throws IOException, LDIFException
1365 {
1366 if (t == null)
1367 {
1368 return;
1369 }
1370
1371 if (t instanceof IOException)
1372 {
1373 throw (IOException) t;
1374 }
1375 else if (t instanceof LDIFException)
1376 {
1377 throw (LDIFException) t;
1378 }
1379 else if (t instanceof RuntimeException)
1380 {
1381 throw (RuntimeException) t;
1382 }
1383 else if (t instanceof Error)
1384 {
1385 throw (Error) t;
1386 }
1387 else
1388 {
1389 throw new IOException(getExceptionMessage(t));
1390 }
1391 }
1392
1393
1394
1395 /**
1396 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1397 * change record.
1398 *
1399 * @return The record read from the LDIF source, or {@code null} if there are
1400 * no more entries to be read.
1401 *
1402 * @throws IOException If a problem occurs while trying to read from the
1403 * LDIF source.
1404 * @throws LDIFException If the data read could not be parsed as an entry or
1405 * an LDIF change record.
1406 */
1407 private LDIFRecord readLDIFRecordInternal()
1408 throws IOException, LDIFException
1409 {
1410 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1411 return decodeRecord(unparsedRecord, relativeBasePath);
1412 }
1413
1414
1415
1416 /**
1417 * Reads an entry from the LDIF source.
1418 *
1419 * @return The entry read from the LDIF source, or {@code null} if there are
1420 * no more entries to be read.
1421 *
1422 * @throws IOException If a problem occurs while attempting to read from the
1423 * LDIF source.
1424 * @throws LDIFException If the data read could not be parsed as an entry.
1425 */
1426 private Entry readEntryInternal()
1427 throws IOException, LDIFException
1428 {
1429 Entry e = null;
1430 while (e == null)
1431 {
1432 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1433 if (unparsedRecord.isEOF())
1434 {
1435 return null;
1436 }
1437
1438 e = decodeEntry(unparsedRecord, relativeBasePath);
1439 debugLDIFRead(e);
1440
1441 if (entryTranslator != null)
1442 {
1443 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber());
1444 }
1445 }
1446 return e;
1447 }
1448
1449
1450
1451 /**
1452 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1453 * record does not have a changetype, then it may be assumed to be an add
1454 * change record.
1455 *
1456 * @param defaultAdd Indicates whether an LDIF record not containing a
1457 * changetype should be retrieved as an add change record.
1458 * If this is {@code false} and the record read does not
1459 * include a changetype, then an {@link LDIFException} will
1460 * be thrown.
1461 *
1462 * @return The change record read from the LDIF source, or {@code null} if
1463 * there are no more records to be read.
1464 *
1465 * @throws IOException If a problem occurs while attempting to read from the
1466 * LDIF source.
1467 * @throws LDIFException If the data read could not be parsed as an LDIF
1468 * change record.
1469 */
1470 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd)
1471 throws IOException, LDIFException
1472 {
1473 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1474 if (unparsedRecord.isEOF())
1475 {
1476 return null;
1477 }
1478
1479 final LDIFChangeRecord r =
1480 decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd);
1481 debugLDIFRead(r);
1482 return r;
1483 }
1484
1485
1486
1487 /**
1488 * Reads a record (either an entry or a change record) from the LDIF source
1489 * and places it in the line list.
1490 *
1491 * @return The line number for the first line of the entry that was read.
1492 *
1493 * @throws IOException If a problem occurs while attempting to read from the
1494 * LDIF source.
1495 *
1496 * @throws LDIFException If the data read could not be parsed as a valid
1497 * LDIF record.
1498 */
1499 private UnparsedLDIFRecord readUnparsedRecord()
1500 throws IOException, LDIFException
1501 {
1502 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20);
1503 boolean lastWasComment = false;
1504 long firstLineNumber = lineNumberCounter + 1;
1505 while (true)
1506 {
1507 final String line = reader.readLine();
1508 lineNumberCounter++;
1509
1510 if (line == null)
1511 {
1512 // We've hit the end of the LDIF source. If we haven't read any entry
1513 // data, then return null. Otherwise, the last entry wasn't followed by
1514 // a blank line, which is OK, and we should decode that entry.
1515 if (lineList.isEmpty())
1516 {
1517 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0),
1518 duplicateValueBehavior, trailingSpaceBehavior, schema, -1);
1519 }
1520 else
1521 {
1522 break;
1523 }
1524 }
1525
1526 if (line.length() == 0)
1527 {
1528 // It's a blank line. If we have read entry data, then this signals the
1529 // end of the entry. Otherwise, it's an extra space between entries,
1530 // which is OK.
1531 lastWasComment = false;
1532 if (lineList.isEmpty())
1533 {
1534 firstLineNumber++;
1535 continue;
1536 }
1537 else
1538 {
1539 break;
1540 }
1541 }
1542
1543 if (line.charAt(0) == ' ')
1544 {
1545 // The line starts with a space, which means that it must be a
1546 // continuation of the previous line. This is true even if the last
1547 // line was a comment.
1548 if (lastWasComment)
1549 {
1550 // What we've read is part of a comment, so we don't care about its
1551 // content.
1552 }
1553 else if (lineList.isEmpty())
1554 {
1555 throw new LDIFException(
1556 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter),
1557 lineNumberCounter, false);
1558 }
1559 else
1560 {
1561 lineList.get(lineList.size() - 1).append(line.substring(1));
1562 lastWasComment = false;
1563 }
1564 }
1565 else if (line.charAt(0) == '#')
1566 {
1567 lastWasComment = true;
1568 }
1569 else
1570 {
1571 // We want to make sure that we skip over the "version:" line if it
1572 // exists, but that should only occur at the beginning of an entry where
1573 // it can't be confused with a possible "version" attribute.
1574 if (lineList.isEmpty() && line.startsWith("version:"))
1575 {
1576 lastWasComment = true;
1577 }
1578 else
1579 {
1580 lineList.add(new StringBuilder(line));
1581 lastWasComment = false;
1582 }
1583 }
1584 }
1585
1586 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1587 trailingSpaceBehavior, schema, firstLineNumber);
1588 }
1589
1590
1591
1592 /**
1593 * Decodes the provided set of LDIF lines as an entry. The provided set of
1594 * lines must contain exactly one entry. Long lines may be wrapped as per the
1595 * LDIF specification, and it is acceptable to have one or more blank lines
1596 * following the entry.
1597 *
1598 * @param ldifLines The set of lines that comprise the LDIF representation
1599 * of the entry. It must not be {@code null} or empty.
1600 *
1601 * @return The entry read from LDIF.
1602 *
1603 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1604 * entry.
1605 */
1606 public static Entry decodeEntry(final String... ldifLines)
1607 throws LDIFException
1608 {
1609 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP,
1610 TrailingSpaceBehavior.REJECT, null, ldifLines),
1611 DEFAULT_RELATIVE_BASE_PATH);
1612 debugLDIFRead(e);
1613 return e;
1614 }
1615
1616
1617
1618 /**
1619 * Decodes the provided set of LDIF lines as an entry. The provided set of
1620 * lines must contain exactly one entry. Long lines may be wrapped as per the
1621 * LDIF specification, and it is acceptable to have one or more blank lines
1622 * following the entry.
1623 *
1624 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1625 * attribute values encountered while parsing.
1626 * @param schema The schema to use when parsing the record,
1627 * if applicable.
1628 * @param ldifLines The set of lines that comprise the LDIF
1629 * representation of the entry. It must not be
1630 * {@code null} or empty.
1631 *
1632 * @return The entry read from LDIF.
1633 *
1634 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1635 * entry.
1636 */
1637 public static Entry decodeEntry(final boolean ignoreDuplicateValues,
1638 final Schema schema,
1639 final String... ldifLines)
1640 throws LDIFException
1641 {
1642 final Entry e = decodeEntry(prepareRecord(
1643 (ignoreDuplicateValues
1644 ? DuplicateValueBehavior.STRIP
1645 : DuplicateValueBehavior.REJECT),
1646 TrailingSpaceBehavior.REJECT, schema, ldifLines),
1647 DEFAULT_RELATIVE_BASE_PATH);
1648 debugLDIFRead(e);
1649 return e;
1650 }
1651
1652
1653
1654 /**
1655 * Decodes the provided set of LDIF lines as an LDIF change record. The
1656 * provided set of lines must contain exactly one change record and it must
1657 * include a changetype. Long lines may be wrapped as per the LDIF
1658 * specification, and it is acceptable to have one or more blank lines
1659 * following the entry.
1660 *
1661 * @param ldifLines The set of lines that comprise the LDIF representation
1662 * of the change record. It must not be {@code null} or
1663 * empty.
1664 *
1665 * @return The change record read from LDIF.
1666 *
1667 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1668 * change record.
1669 */
1670 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines)
1671 throws LDIFException
1672 {
1673 return decodeChangeRecord(false, ldifLines);
1674 }
1675
1676
1677
1678 /**
1679 * Decodes the provided set of LDIF lines as an LDIF change record. The
1680 * provided set of lines must contain exactly one change record. Long lines
1681 * may be wrapped as per the LDIF specification, and it is acceptable to have
1682 * one or more blank lines following the entry.
1683 *
1684 * @param defaultAdd Indicates whether an LDIF record not containing a
1685 * changetype should be retrieved as an add change record.
1686 * If this is {@code false} and the record read does not
1687 * include a changetype, then an {@link LDIFException}
1688 * will be thrown.
1689 * @param ldifLines The set of lines that comprise the LDIF representation
1690 * of the change record. It must not be {@code null} or
1691 * empty.
1692 *
1693 * @return The change record read from LDIF.
1694 *
1695 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1696 * change record.
1697 */
1698 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd,
1699 final String... ldifLines)
1700 throws LDIFException
1701 {
1702 final LDIFChangeRecord r =
1703 decodeChangeRecord(
1704 prepareRecord(DuplicateValueBehavior.STRIP,
1705 TrailingSpaceBehavior.REJECT, null, ldifLines),
1706 DEFAULT_RELATIVE_BASE_PATH, defaultAdd);
1707 debugLDIFRead(r);
1708 return r;
1709 }
1710
1711
1712
1713 /**
1714 * Decodes the provided set of LDIF lines as an LDIF change record. The
1715 * provided set of lines must contain exactly one change record. Long lines
1716 * may be wrapped as per the LDIF specification, and it is acceptable to have
1717 * one or more blank lines following the entry.
1718 *
1719 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1720 * attribute values encountered while parsing.
1721 * @param schema The schema to use when processing the change
1722 * record, or {@code null} if no schema should
1723 * be used and all values should be treated as
1724 * case-insensitive strings.
1725 * @param defaultAdd Indicates whether an LDIF record not
1726 * containing a changetype should be retrieved
1727 * as an add change record. If this is
1728 * {@code false} and the record read does not
1729 * include a changetype, then an
1730 * {@link LDIFException} will be thrown.
1731 * @param ldifLines The set of lines that comprise the LDIF
1732 * representation of the change record. It
1733 * must not be {@code null} or empty.
1734 *
1735 * @return The change record read from LDIF.
1736 *
1737 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1738 * change record.
1739 */
1740 public static LDIFChangeRecord decodeChangeRecord(
1741 final boolean ignoreDuplicateValues,
1742 final Schema schema,
1743 final boolean defaultAdd,
1744 final String... ldifLines)
1745 throws LDIFException
1746 {
1747 final LDIFChangeRecord r = decodeChangeRecord(
1748 prepareRecord(
1749 (ignoreDuplicateValues
1750 ? DuplicateValueBehavior.STRIP
1751 : DuplicateValueBehavior.REJECT),
1752 TrailingSpaceBehavior.REJECT, schema, ldifLines),
1753 DEFAULT_RELATIVE_BASE_PATH, defaultAdd);
1754 debugLDIFRead(r);
1755 return r;
1756 }
1757
1758
1759
1760 /**
1761 * Parses the provided set of lines into a list of {@code StringBuilder}
1762 * objects suitable for decoding into an entry or LDIF change record.
1763 * Comments will be ignored and wrapped lines will be unwrapped.
1764 *
1765 * @param duplicateValueBehavior The behavior that should be exhibited if
1766 * the LDIF reader encounters an entry with
1767 * duplicate values.
1768 * @param trailingSpaceBehavior The behavior that should be exhibited when
1769 * encountering attribute values which are not
1770 * base64-encoded but contain trailing spaces.
1771 * @param schema The schema to use when parsing the record,
1772 * if applicable.
1773 * @param ldifLines The set of lines that comprise the record
1774 * to decode. It must not be {@code null} or
1775 * empty.
1776 *
1777 * @return The prepared list of {@code StringBuilder} objects ready to be
1778 * decoded.
1779 *
1780 * @throws LDIFException If the provided lines do not contain valid LDIF
1781 * content.
1782 */
1783 private static UnparsedLDIFRecord prepareRecord(
1784 final DuplicateValueBehavior duplicateValueBehavior,
1785 final TrailingSpaceBehavior trailingSpaceBehavior,
1786 final Schema schema, final String... ldifLines)
1787 throws LDIFException
1788 {
1789 ensureNotNull(ldifLines);
1790 ensureFalse(ldifLines.length == 0,
1791 "LDIFReader.prepareRecord.ldifLines must not be empty.");
1792
1793 boolean lastWasComment = false;
1794 final ArrayList<StringBuilder> lineList =
1795 new ArrayList<StringBuilder>(ldifLines.length);
1796 for (int i=0; i < ldifLines.length; i++)
1797 {
1798 final String line = ldifLines[i];
1799 if (line.length() == 0)
1800 {
1801 // This is only acceptable if there are no more non-empty lines in the
1802 // array.
1803 for (int j=i+1; j < ldifLines.length; j++)
1804 {
1805 if (ldifLines[j].length() > 0)
1806 {
1807 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true,
1808 ldifLines, null);
1809 }
1810
1811 // If we've gotten here, then we know that we're at the end of the
1812 // entry. If we have read data, then we can decode it as an entry.
1813 // Otherwise, there was no real data in the provided LDIF lines.
1814 if (lineList.isEmpty())
1815 {
1816 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true,
1817 ldifLines, null);
1818 }
1819 else
1820 {
1821 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1822 trailingSpaceBehavior, schema, 0);
1823 }
1824 }
1825 }
1826
1827 if (line.charAt(0) == ' ')
1828 {
1829 if (i > 0)
1830 {
1831 if (! lastWasComment)
1832 {
1833 lineList.get(lineList.size() - 1).append(line.substring(1));
1834 }
1835 }
1836 else
1837 {
1838 throw new LDIFException(
1839 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0,
1840 true, ldifLines, null);
1841 }
1842 }
1843 else if (line.charAt(0) == '#')
1844 {
1845 lastWasComment = true;
1846 }
1847 else
1848 {
1849 lineList.add(new StringBuilder(line));
1850 lastWasComment = false;
1851 }
1852 }
1853
1854 if (lineList.isEmpty())
1855 {
1856 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null);
1857 }
1858 else
1859 {
1860 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1861 trailingSpaceBehavior, schema, 0);
1862 }
1863 }
1864
1865
1866
1867 /**
1868 * Decodes the unparsed record that was read from the LDIF source. It may be
1869 * either an entry or an LDIF change record.
1870 *
1871 * @param unparsedRecord The unparsed LDIF record that was read from the
1872 * input. It must not be {@code null} or empty.
1873 * @param relativeBasePath The base path that will be prepended to relative
1874 * paths in order to obtain an absolute path.
1875 *
1876 * @return The parsed record, or {@code null} if there are no more entries to
1877 * be read.
1878 *
1879 * @throws LDIFException If the data read could not be parsed as an entry or
1880 * an LDIF change record.
1881 */
1882 private static LDIFRecord decodeRecord(
1883 final UnparsedLDIFRecord unparsedRecord,
1884 final String relativeBasePath)
1885 throws LDIFException
1886 {
1887 // If there was an error reading from the input, then we rethrow it here.
1888 final Exception readError = unparsedRecord.getFailureCause();
1889 if (readError != null)
1890 {
1891 if (readError instanceof LDIFException)
1892 {
1893 // If the error was an LDIFException, which will normally be the case,
1894 // then rethrow it with all of the same state. We could just
1895 // throw (LDIFException) readError;
1896 // but that's considered bad form.
1897 final LDIFException ldifEx = (LDIFException) readError;
1898 throw new LDIFException(ldifEx.getMessage(),
1899 ldifEx.getLineNumber(),
1900 ldifEx.mayContinueReading(),
1901 ldifEx.getDataLines(),
1902 ldifEx.getCause());
1903 }
1904 else
1905 {
1906 throw new LDIFException(getExceptionMessage(readError),
1907 -1, true, readError);
1908 }
1909 }
1910
1911 if (unparsedRecord.isEOF())
1912 {
1913 return null;
1914 }
1915
1916 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList();
1917 if (unparsedRecord.getLineList() == null)
1918 {
1919 return null; // We can get here if there was an error reading the lines.
1920 }
1921
1922 final LDIFRecord r;
1923 if ((lineList.size() > 1) &&
1924 toLowerCase(lineList.get(1).toString()).startsWith("changetype:"))
1925 {
1926 r = decodeChangeRecord(unparsedRecord, relativeBasePath, false);
1927 }
1928 else
1929 {
1930 r = decodeEntry(unparsedRecord, relativeBasePath);
1931 }
1932
1933 debugLDIFRead(r);
1934 return r;
1935 }
1936
1937
1938
1939 /**
1940 * Decodes the provided set of LDIF lines as an entry. The provided list must
1941 * not contain any blank lines or comments, and lines are not allowed to be
1942 * wrapped.
1943 *
1944 * @param unparsedRecord The unparsed LDIF record that was read from the
1945 * input. It must not be {@code null} or empty.
1946 * @param relativeBasePath The base path that will be prepended to relative
1947 * paths in order to obtain an absolute path.
1948 *
1949 * @return The entry read from LDIF.
1950 *
1951 * @throws LDIFException If the provided LDIF data cannot be read as an
1952 * entry.
1953 */
1954 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord,
1955 final String relativeBasePath)
1956 throws LDIFException
1957 {
1958 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
1959 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
1960
1961 final Iterator<StringBuilder> iterator = ldifLines.iterator();
1962
1963 // The first line must be the entry DN, and it must start with "dn:".
1964 final StringBuilder line = iterator.next();
1965 handleTrailingSpaces(line, null, firstLineNumber,
1966 unparsedRecord.getTrailingSpaceBehavior());
1967 final int colonPos = line.indexOf(":");
1968 if ((colonPos < 0) ||
1969 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
1970 {
1971 throw new LDIFException(
1972 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
1973 firstLineNumber, true, ldifLines, null);
1974 }
1975
1976 final String dn;
1977 final int length = line.length();
1978 if (length == (colonPos+1))
1979 {
1980 // The colon was the last character on the line. This is acceptable and
1981 // indicates that the entry has the null DN.
1982 dn = "";
1983 }
1984 else if (line.charAt(colonPos+1) == ':')
1985 {
1986 // Skip over any spaces leading up to the value, and then the rest of the
1987 // string is the base64-encoded DN.
1988 int pos = colonPos+2;
1989 while ((pos < length) && (line.charAt(pos) == ' '))
1990 {
1991 pos++;
1992 }
1993
1994 try
1995 {
1996 final byte[] dnBytes = Base64.decode(line.substring(pos));
1997 dn = new String(dnBytes, "UTF-8");
1998 }
1999 catch (final ParseException pe)
2000 {
2001 debugException(pe);
2002 throw new LDIFException(
2003 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2004 pe.getMessage()),
2005 firstLineNumber, true, ldifLines, pe);
2006 }
2007 catch (final Exception e)
2008 {
2009 debugException(e);
2010 throw new LDIFException(
2011 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e),
2012 firstLineNumber, true, ldifLines, e);
2013 }
2014 }
2015 else
2016 {
2017 // Skip over any spaces leading up to the value, and then the rest of the
2018 // string is the DN.
2019 int pos = colonPos+1;
2020 while ((pos < length) && (line.charAt(pos) == ' '))
2021 {
2022 pos++;
2023 }
2024
2025 dn = line.substring(pos);
2026 }
2027
2028
2029 // The remaining lines must be the attributes for the entry. However, we
2030 // will allow the case in which an entry does not have any attributes, to be
2031 // able to support reading search result entries in which no attributes were
2032 // returned.
2033 if (! iterator.hasNext())
2034 {
2035 return new Entry(dn, unparsedRecord.getSchema());
2036 }
2037
2038 return new Entry(dn, unparsedRecord.getSchema(),
2039 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2040 unparsedRecord.getTrailingSpaceBehavior(),
2041 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath,
2042 firstLineNumber));
2043 }
2044
2045
2046
2047 /**
2048 * Decodes the provided set of LDIF lines as a change record. The provided
2049 * list must not contain any blank lines or comments, and lines are not
2050 * allowed to be wrapped.
2051 *
2052 * @param unparsedRecord The unparsed LDIF record that was read from the
2053 * input. It must not be {@code null} or empty.
2054 * @param relativeBasePath The base path that will be prepended to relative
2055 * paths in order to obtain an absolute path.
2056 * @param defaultAdd Indicates whether an LDIF record not containing a
2057 * changetype should be retrieved as an add change
2058 * record. If this is {@code false} and the record
2059 * read does not include a changetype, then an
2060 * {@link LDIFException} will be thrown.
2061 *
2062 * @return The change record read from LDIF.
2063 *
2064 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2065 * change record.
2066 */
2067 private static LDIFChangeRecord decodeChangeRecord(
2068 final UnparsedLDIFRecord unparsedRecord,
2069 final String relativeBasePath,
2070 final boolean defaultAdd)
2071 throws LDIFException
2072 {
2073 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2074 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2075
2076 final Iterator<StringBuilder> iterator = ldifLines.iterator();
2077
2078 // The first line must be the entry DN, and it must start with "dn:".
2079 StringBuilder line = iterator.next();
2080 handleTrailingSpaces(line, null, firstLineNumber,
2081 unparsedRecord.getTrailingSpaceBehavior());
2082 int colonPos = line.indexOf(":");
2083 if ((colonPos < 0) ||
2084 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2085 {
2086 throw new LDIFException(
2087 ERR_READ_CR_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2088 firstLineNumber, true, ldifLines, null);
2089 }
2090
2091 final String dn;
2092 int length = line.length();
2093 if (length == (colonPos+1))
2094 {
2095 // The colon was the last character on the line. This is acceptable and
2096 // indicates that the entry has the null DN.
2097 dn = "";
2098 }
2099 else if (line.charAt(colonPos+1) == ':')
2100 {
2101 // Skip over any spaces leading up to the value, and then the rest of the
2102 // string is the base64-encoded DN.
2103 int pos = colonPos+2;
2104 while ((pos < length) && (line.charAt(pos) == ' '))
2105 {
2106 pos++;
2107 }
2108
2109 try
2110 {
2111 final byte[] dnBytes = Base64.decode(line.substring(pos));
2112 dn = new String(dnBytes, "UTF-8");
2113 }
2114 catch (final ParseException pe)
2115 {
2116 debugException(pe);
2117 throw new LDIFException(
2118 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2119 pe.getMessage()),
2120 firstLineNumber, true, ldifLines, pe);
2121 }
2122 catch (final Exception e)
2123 {
2124 debugException(e);
2125 throw new LDIFException(
2126 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2127 e),
2128 firstLineNumber, true, ldifLines, e);
2129 }
2130 }
2131 else
2132 {
2133 // Skip over any spaces leading up to the value, and then the rest of the
2134 // string is the DN.
2135 int pos = colonPos+1;
2136 while ((pos < length) && (line.charAt(pos) == ' '))
2137 {
2138 pos++;
2139 }
2140
2141 dn = line.substring(pos);
2142 }
2143
2144
2145 // The second line must be the change type, and it must start with
2146 // "changetype:".
2147 if (! iterator.hasNext())
2148 {
2149 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber),
2150 firstLineNumber, true, ldifLines, null);
2151 }
2152
2153
2154 // If defaultAdd is true, then the change record may or may not have a
2155 // changetype. If it is false, then the record must have a changetype.
2156 final String changeType;
2157 if (defaultAdd &&
2158 (! toLowerCase(ldifLines.get(1).toString()).startsWith("changetype:")))
2159 {
2160 changeType = "add";
2161 }
2162 else
2163 {
2164 line = iterator.next();
2165 handleTrailingSpaces(line, dn, firstLineNumber,
2166 unparsedRecord.getTrailingSpaceBehavior());
2167 colonPos = line.indexOf(":");
2168 if ((colonPos < 0) ||
2169 (! line.substring(0, colonPos).equalsIgnoreCase("changetype")))
2170 {
2171 throw new LDIFException(
2172 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CT.get(firstLineNumber),
2173 firstLineNumber, true, ldifLines, null);
2174 }
2175
2176 length = line.length();
2177 if (length == (colonPos+1))
2178 {
2179 // The colon was the last character on the line. This is not
2180 // acceptable.
2181 throw new LDIFException(
2182 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber,
2183 true, ldifLines, null);
2184 }
2185 else if (line.charAt(colonPos+1) == ':')
2186 {
2187 // Skip over any spaces leading up to the value, and then the rest of
2188 // the string is the base64-encoded changetype. This is unusual and
2189 // unnecessary, but is nevertheless acceptable.
2190 int pos = colonPos+2;
2191 while ((pos < length) && (line.charAt(pos) == ' '))
2192 {
2193 pos++;
2194 }
2195
2196 try
2197 {
2198 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
2199 changeType = new String(changeTypeBytes, "UTF-8");
2200 }
2201 catch (final ParseException pe)
2202 {
2203 debugException(pe);
2204 throw new LDIFException(
2205 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber,
2206 pe.getMessage()),
2207 firstLineNumber, true, ldifLines, pe);
2208 }
2209 catch (final Exception e)
2210 {
2211 debugException(e);
2212 throw new LDIFException(
2213 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e),
2214 firstLineNumber, true, ldifLines, e);
2215 }
2216 }
2217 else
2218 {
2219 // Skip over any spaces leading up to the value, and then the rest of
2220 // the string is the changetype.
2221 int pos = colonPos+1;
2222 while ((pos < length) && (line.charAt(pos) == ' '))
2223 {
2224 pos++;
2225 }
2226
2227 changeType = line.substring(pos);
2228 }
2229 }
2230
2231
2232 // Make sure that the change type is acceptable and then decode the rest of
2233 // the change record accordingly.
2234 final String lowerChangeType = toLowerCase(changeType);
2235 if (lowerChangeType.equals("add"))
2236 {
2237 // There must be at least one more line. If not, then that's an error.
2238 // Otherwise, parse the rest of the data as attribute-value pairs.
2239 if (iterator.hasNext())
2240 {
2241 final Collection<Attribute> attrs =
2242 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2243 unparsedRecord.getTrailingSpaceBehavior(),
2244 unparsedRecord.getSchema(), ldifLines, iterator,
2245 relativeBasePath, firstLineNumber);
2246 final Attribute[] attributes = new Attribute[attrs.size()];
2247 final Iterator<Attribute> attrIterator = attrs.iterator();
2248 for (int i=0; i < attributes.length; i++)
2249 {
2250 attributes[i] = attrIterator.next();
2251 }
2252
2253 return new LDIFAddChangeRecord(dn, attributes);
2254 }
2255 else
2256 {
2257 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber),
2258 firstLineNumber, true, ldifLines, null);
2259 }
2260 }
2261 else if (lowerChangeType.equals("delete"))
2262 {
2263 // There shouldn't be any more data. If there is, then that's an error.
2264 // Otherwise, we can just return the delete change record with what we
2265 // already know.
2266 if (iterator.hasNext())
2267 {
2268 throw new LDIFException(
2269 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber),
2270 firstLineNumber, true, ldifLines, null);
2271 }
2272 else
2273 {
2274 return new LDIFDeleteChangeRecord(dn);
2275 }
2276 }
2277 else if (lowerChangeType.equals("modify"))
2278 {
2279 // There must be at least one more line. If not, then that's an error.
2280 // Otherwise, parse the rest of the data as a set of modifications.
2281 if (iterator.hasNext())
2282 {
2283 final Modification[] mods = parseModifications(dn,
2284 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator,
2285 firstLineNumber);
2286 return new LDIFModifyChangeRecord(dn, mods);
2287 }
2288 else
2289 {
2290 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber),
2291 firstLineNumber, true, ldifLines, null);
2292 }
2293 }
2294 else if (lowerChangeType.equals("moddn") ||
2295 lowerChangeType.equals("modrdn"))
2296 {
2297 // There must be at least one more line. If not, then that's an error.
2298 // Otherwise, parse the rest of the data as a set of modifications.
2299 if (iterator.hasNext())
2300 {
2301 return parseModifyDNChangeRecord(ldifLines, iterator, dn,
2302 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber);
2303 }
2304 else
2305 {
2306 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber),
2307 firstLineNumber, true, ldifLines, null);
2308 }
2309 }
2310 else
2311 {
2312 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType,
2313 firstLineNumber),
2314 firstLineNumber, true, ldifLines, null);
2315 }
2316 }
2317
2318
2319
2320 /**
2321 * Parses the data available through the provided iterator as a collection of
2322 * attributes suitable for use in an entry or an add change record.
2323 *
2324 * @param dn The DN of the record being read.
2325 * @param duplicateValueBehavior The behavior that should be exhibited if
2326 * the LDIF reader encounters an entry with
2327 * duplicate values.
2328 * @param trailingSpaceBehavior The behavior that should be exhibited when
2329 * encountering attribute values which are not
2330 * base64-encoded but contain trailing spaces.
2331 * @param schema The schema to use when parsing the
2332 * attributes, or {@code null} if none is
2333 * needed.
2334 * @param ldifLines The lines that comprise the LDIF
2335 * representation of the full record being
2336 * parsed.
2337 * @param iterator The iterator to use to access the attribute
2338 * lines.
2339 * @param relativeBasePath The base path that will be prepended to
2340 * relative paths in order to obtain an
2341 * absolute path.
2342 * @param firstLineNumber The line number for the start of the
2343 * record.
2344 *
2345 * @return The collection of attributes that were read.
2346 *
2347 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2348 * set of attributes.
2349 */
2350 private static ArrayList<Attribute> parseAttributes(final String dn,
2351 final DuplicateValueBehavior duplicateValueBehavior,
2352 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema,
2353 final ArrayList<StringBuilder> ldifLines,
2354 final Iterator<StringBuilder> iterator, final String relativeBasePath,
2355 final long firstLineNumber)
2356 throws LDIFException
2357 {
2358 final LinkedHashMap<String,Object> attributes =
2359 new LinkedHashMap<String,Object>(ldifLines.size());
2360 while (iterator.hasNext())
2361 {
2362 final StringBuilder line = iterator.next();
2363 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2364 final int colonPos = line.indexOf(":");
2365 if (colonPos <= 0)
2366 {
2367 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
2368 firstLineNumber, true, ldifLines, null);
2369 }
2370
2371 final String attributeName = line.substring(0, colonPos);
2372 final String lowerName = toLowerCase(attributeName);
2373
2374 final MatchingRule matchingRule;
2375 if (schema == null)
2376 {
2377 matchingRule = CaseIgnoreStringMatchingRule.getInstance();
2378 }
2379 else
2380 {
2381 matchingRule =
2382 MatchingRule.selectEqualityMatchingRule(attributeName, schema);
2383 }
2384
2385 Attribute attr;
2386 final LDIFAttribute ldifAttr;
2387 final Object attrObject = attributes.get(lowerName);
2388 if (attrObject == null)
2389 {
2390 attr = null;
2391 ldifAttr = null;
2392 }
2393 else
2394 {
2395 if (attrObject instanceof Attribute)
2396 {
2397 attr = (Attribute) attrObject;
2398 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule,
2399 attr.getRawValues()[0]);
2400 attributes.put(lowerName, ldifAttr);
2401 }
2402 else
2403 {
2404 attr = null;
2405 ldifAttr = (LDIFAttribute) attrObject;
2406 }
2407 }
2408
2409 final int length = line.length();
2410 if (length == (colonPos+1))
2411 {
2412 // This means that the attribute has a zero-length value, which is
2413 // acceptable.
2414 if (attrObject == null)
2415 {
2416 attr = new Attribute(attributeName, "");
2417 attributes.put(lowerName, attr);
2418 }
2419 else
2420 {
2421 try
2422 {
2423 if (! ldifAttr.addValue(new ASN1OctetString(),
2424 duplicateValueBehavior))
2425 {
2426 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2427 {
2428 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2429 firstLineNumber, attributeName), firstLineNumber, true,
2430 ldifLines, null);
2431 }
2432 }
2433 }
2434 catch (LDAPException le)
2435 {
2436 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
2437 firstLineNumber, attributeName, getExceptionMessage(le)),
2438 firstLineNumber, true, ldifLines, le);
2439 }
2440 }
2441 }
2442 else if (line.charAt(colonPos+1) == ':')
2443 {
2444 // Skip over any spaces leading up to the value, and then the rest of
2445 // the string is the base64-encoded attribute value.
2446 int pos = colonPos+2;
2447 while ((pos < length) && (line.charAt(pos) == ' '))
2448 {
2449 pos++;
2450 }
2451
2452 try
2453 {
2454 final byte[] valueBytes = Base64.decode(line.substring(pos));
2455 if (attrObject == null)
2456 {
2457 attr = new Attribute(attributeName, valueBytes);
2458 attributes.put(lowerName, attr);
2459 }
2460 else
2461 {
2462 try
2463 {
2464 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes),
2465 duplicateValueBehavior))
2466 {
2467 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2468 {
2469 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2470 firstLineNumber, attributeName), firstLineNumber, true,
2471 ldifLines, null);
2472 }
2473 }
2474 }
2475 catch (LDAPException le)
2476 {
2477 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
2478 firstLineNumber, attributeName, getExceptionMessage(le)),
2479 firstLineNumber, true, ldifLines, le);
2480 }
2481 }
2482 }
2483 catch (final ParseException pe)
2484 {
2485 debugException(pe);
2486 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
2487 attributeName, firstLineNumber,
2488 pe.getMessage()),
2489 firstLineNumber, true, ldifLines, pe);
2490 }
2491 }
2492 else if (line.charAt(colonPos+1) == '<')
2493 {
2494 // Skip over any spaces leading up to the value, and then the rest of
2495 // the string is a URL that indicates where to get the real content.
2496 // At the present time, we'll only support the file URLs.
2497 int pos = colonPos+2;
2498 while ((pos < length) && (line.charAt(pos) == ' '))
2499 {
2500 pos++;
2501 }
2502
2503 final String path;
2504 final String urlString = line.substring(pos);
2505 final String lowerURLString = toLowerCase(urlString);
2506 if (lowerURLString.startsWith("file:/"))
2507 {
2508 pos = 6;
2509 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/'))
2510 {
2511 pos++;
2512 }
2513
2514 path = urlString.substring(pos-1);
2515 }
2516 else if (lowerURLString.startsWith("file:"))
2517 {
2518 // A file: URL that doesn't include a slash will be interpreted as a
2519 // relative path.
2520 path = relativeBasePath + urlString.substring(5);
2521 }
2522 else
2523 {
2524 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(attributeName,
2525 urlString, firstLineNumber),
2526 firstLineNumber, true, ldifLines, null);
2527 }
2528
2529 try
2530 {
2531 final File f = new File(path);
2532 if (! f.exists())
2533 {
2534 throw new LDIFException(ERR_READ_URL_NO_SUCH_FILE.get(attributeName,
2535 urlString, firstLineNumber,
2536 f.getAbsolutePath()),
2537 firstLineNumber, true, ldifLines, null);
2538 }
2539
2540 // In order to conserve memory, we'll only allow values to be read
2541 // from files no larger than 10 megabytes.
2542 final long fileSize = f.length();
2543 if (fileSize > (10 * 1024 * 1024))
2544 {
2545 throw new LDIFException(ERR_READ_URL_FILE_TOO_LARGE.get(
2546 attributeName, urlString,
2547 firstLineNumber, f.getAbsolutePath(),
2548 (10*1024*1024)),
2549 firstLineNumber, true, ldifLines, null);
2550 }
2551
2552 int fileBytesRead = 0;
2553 int fileBytesRemaining = (int) fileSize;
2554 final byte[] fileData = new byte[(int) fileSize];
2555 final FileInputStream fis = new FileInputStream(f);
2556 try
2557 {
2558 while (fileBytesRead < fileSize)
2559 {
2560 final int bytesRead =
2561 fis.read(fileData, fileBytesRead, fileBytesRemaining);
2562 if (bytesRead < 0)
2563 {
2564 // We hit the end of the file before we expected to. This
2565 // shouldn't happen unless the file size changed since we first
2566 // looked at it, which we won't allow.
2567 throw new LDIFException(ERR_READ_URL_FILE_SIZE_CHANGED.get(
2568 attributeName, urlString,
2569 firstLineNumber,
2570 f.getAbsolutePath()),
2571 firstLineNumber, true, ldifLines, null);
2572 }
2573
2574 fileBytesRead += bytesRead;
2575 fileBytesRemaining -= bytesRead;
2576 }
2577
2578 if (fis.read() != -1)
2579 {
2580 // There is still more data to read. This shouldn't happen unless
2581 // the file size changed since we first looked at it, which we
2582 // won't allow.
2583 throw new LDIFException(ERR_READ_URL_FILE_SIZE_CHANGED.get(
2584 attributeName, urlString,
2585 firstLineNumber,
2586 f.getAbsolutePath()),
2587 firstLineNumber, true, ldifLines, null);
2588 }
2589 }
2590 finally
2591 {
2592 fis.close();
2593 }
2594
2595 if (attrObject == null)
2596 {
2597 attr = new Attribute(attributeName, fileData);
2598 attributes.put(lowerName, attr);
2599 }
2600 else
2601 {
2602 if (! ldifAttr.addValue(new ASN1OctetString(fileData),
2603 duplicateValueBehavior))
2604 {
2605 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2606 {
2607 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2608 firstLineNumber, attributeName), firstLineNumber, true,
2609 ldifLines, null);
2610 }
2611 }
2612 }
2613 }
2614 catch (LDIFException le)
2615 {
2616 debugException(le);
2617 throw le;
2618 }
2619 catch (Exception e)
2620 {
2621 debugException(e);
2622 throw new LDIFException(ERR_READ_URL_EXCEPTION.get(attributeName,
2623 urlString, firstLineNumber, e),
2624 firstLineNumber, true, ldifLines, e);
2625 }
2626 }
2627 else
2628 {
2629 // Skip over any spaces leading up to the value, and then the rest of
2630 // the string is the value.
2631 int pos = colonPos+1;
2632 while ((pos < length) && (line.charAt(pos) == ' '))
2633 {
2634 pos++;
2635 }
2636
2637 final String valueString = line.substring(pos);
2638 if (attrObject == null)
2639 {
2640 attr = new Attribute(attributeName, valueString);
2641 attributes.put(lowerName, attr);
2642 }
2643 else
2644 {
2645 try
2646 {
2647 if (! ldifAttr.addValue(new ASN1OctetString(valueString),
2648 duplicateValueBehavior))
2649 {
2650 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2651 {
2652 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2653 firstLineNumber, attributeName), firstLineNumber, true,
2654 ldifLines, null);
2655 }
2656 }
2657 }
2658 catch (LDAPException le)
2659 {
2660 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
2661 firstLineNumber, attributeName, getExceptionMessage(le)),
2662 firstLineNumber, true, ldifLines, le);
2663 }
2664 }
2665 }
2666 }
2667
2668 final ArrayList<Attribute> attrList =
2669 new ArrayList<Attribute>(attributes.size());
2670 for (final Object o : attributes.values())
2671 {
2672 if (o instanceof Attribute)
2673 {
2674 attrList.add((Attribute) o);
2675 }
2676 else
2677 {
2678 attrList.add(((LDIFAttribute) o).toAttribute());
2679 }
2680 }
2681
2682 return attrList;
2683 }
2684
2685
2686
2687 /**
2688 * Parses the data available through the provided iterator into an array of
2689 * modifications suitable for use in a modify change record.
2690 *
2691 * @param dn The DN of the entry being parsed.
2692 * @param trailingSpaceBehavior The behavior that should be exhibited when
2693 * encountering attribute values which are not
2694 * base64-encoded but contain trailing spaces.
2695 * @param ldifLines The lines that comprise the LDIF
2696 * representation of the full record being
2697 * parsed.
2698 * @param iterator The iterator to use to access the
2699 * modification data.
2700 * @param firstLineNumber The line number for the start of the record.
2701 *
2702 * @return An array containing the modifications that were read.
2703 *
2704 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2705 * set of modifications.
2706 */
2707 private static Modification[] parseModifications(final String dn,
2708 final TrailingSpaceBehavior trailingSpaceBehavior,
2709 final ArrayList<StringBuilder> ldifLines,
2710 final Iterator<StringBuilder> iterator, final long firstLineNumber)
2711 throws LDIFException
2712 {
2713 final ArrayList<Modification> modList =
2714 new ArrayList<Modification>(ldifLines.size());
2715
2716 while (iterator.hasNext())
2717 {
2718 // The first line must start with "add:", "delete:", "replace:", or
2719 // "increment:" followed by an attribute name.
2720 StringBuilder line = iterator.next();
2721 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2722 int colonPos = line.indexOf(":");
2723 if (colonPos < 0)
2724 {
2725 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber),
2726 firstLineNumber, true, ldifLines, null);
2727 }
2728
2729 final ModificationType modType;
2730 final String modTypeStr = toLowerCase(line.substring(0, colonPos));
2731 if (modTypeStr.equals("add"))
2732 {
2733 modType = ModificationType.ADD;
2734 }
2735 else if (modTypeStr.equals("delete"))
2736 {
2737 modType = ModificationType.DELETE;
2738 }
2739 else if (modTypeStr.equals("replace"))
2740 {
2741 modType = ModificationType.REPLACE;
2742 }
2743 else if (modTypeStr.equals("increment"))
2744 {
2745 modType = ModificationType.INCREMENT;
2746 }
2747 else
2748 {
2749 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr,
2750 firstLineNumber),
2751 firstLineNumber, true, ldifLines, null);
2752 }
2753
2754 final String attributeName;
2755 int length = line.length();
2756 if (length == (colonPos+1))
2757 {
2758 // The colon was the last character on the line. This is not
2759 // acceptable.
2760 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
2761 firstLineNumber),
2762 firstLineNumber, true, ldifLines, null);
2763 }
2764 else if (line.charAt(colonPos+1) == ':')
2765 {
2766 // Skip over any spaces leading up to the value, and then the rest of
2767 // the string is the base64-encoded attribute name.
2768 int pos = colonPos+2;
2769 while ((pos < length) && (line.charAt(pos) == ' '))
2770 {
2771 pos++;
2772 }
2773
2774 try
2775 {
2776 final byte[] dnBytes = Base64.decode(line.substring(pos));
2777 attributeName = new String(dnBytes, "UTF-8");
2778 }
2779 catch (final ParseException pe)
2780 {
2781 debugException(pe);
2782 throw new LDIFException(
2783 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
2784 firstLineNumber, pe.getMessage()),
2785 firstLineNumber, true, ldifLines, pe);
2786 }
2787 catch (final Exception e)
2788 {
2789 debugException(e);
2790 throw new LDIFException(
2791 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
2792 firstLineNumber, e),
2793 firstLineNumber, true, ldifLines, e);
2794 }
2795 }
2796 else
2797 {
2798 // Skip over any spaces leading up to the value, and then the rest of
2799 // the string is the attribute name.
2800 int pos = colonPos+1;
2801 while ((pos < length) && (line.charAt(pos) == ' '))
2802 {
2803 pos++;
2804 }
2805
2806 attributeName = line.substring(pos);
2807 }
2808
2809 if (attributeName.length() == 0)
2810 {
2811 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
2812 firstLineNumber),
2813 firstLineNumber, true, ldifLines, null);
2814 }
2815
2816
2817 // The next zero or more lines may be the set of attribute values. Keep
2818 // reading until we reach the end of the iterator or until we find a line
2819 // with just a "-".
2820 final ArrayList<ASN1OctetString> valueList =
2821 new ArrayList<ASN1OctetString>(ldifLines.size());
2822 while (iterator.hasNext())
2823 {
2824 line = iterator.next();
2825 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2826 if (line.toString().equals("-"))
2827 {
2828 break;
2829 }
2830
2831 colonPos = line.indexOf(":");
2832 if (colonPos < 0)
2833 {
2834 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
2835 firstLineNumber, true, ldifLines, null);
2836 }
2837 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName))
2838 {
2839 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get(
2840 firstLineNumber,
2841 line.substring(0, colonPos),
2842 attributeName),
2843 firstLineNumber, true, ldifLines, null);
2844 }
2845
2846 final ASN1OctetString value;
2847 length = line.length();
2848 if (length == (colonPos+1))
2849 {
2850 // The colon was the last character on the line. This is fine.
2851 value = new ASN1OctetString();
2852 }
2853 else if (line.charAt(colonPos+1) == ':')
2854 {
2855 // Skip over any spaces leading up to the value, and then the rest of
2856 // the string is the base64-encoded value. This is unusual and
2857 // unnecessary, but is nevertheless acceptable.
2858 int pos = colonPos+2;
2859 while ((pos < length) && (line.charAt(pos) == ' '))
2860 {
2861 pos++;
2862 }
2863
2864 try
2865 {
2866 value = new ASN1OctetString(Base64.decode(line.substring(pos)));
2867 }
2868 catch (final ParseException pe)
2869 {
2870 debugException(pe);
2871 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
2872 attributeName, firstLineNumber, pe.getMessage()),
2873 firstLineNumber, true, ldifLines, pe);
2874 }
2875 catch (final Exception e)
2876 {
2877 debugException(e);
2878 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
2879 firstLineNumber, e),
2880 firstLineNumber, true, ldifLines, e);
2881 }
2882 }
2883 else
2884 {
2885 // Skip over any spaces leading up to the value, and then the rest of
2886 // the string is the value.
2887 int pos = colonPos+1;
2888 while ((pos < length) && (line.charAt(pos) == ' '))
2889 {
2890 pos++;
2891 }
2892
2893 value = new ASN1OctetString(line.substring(pos));
2894 }
2895
2896 valueList.add(value);
2897 }
2898
2899 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()];
2900 valueList.toArray(values);
2901
2902 // If it's an add modification type, then there must be at least one
2903 // value.
2904 if ((modType.intValue() == ModificationType.ADD.intValue()) &&
2905 (values.length == 0))
2906 {
2907 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName,
2908 firstLineNumber),
2909 firstLineNumber, true, ldifLines, null);
2910 }
2911
2912 // If it's an increment modification type, then there must be exactly one
2913 // value.
2914 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) &&
2915 (values.length != 1))
2916 {
2917 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get(
2918 firstLineNumber, attributeName),
2919 firstLineNumber, true, ldifLines, null);
2920 }
2921
2922 modList.add(new Modification(modType, attributeName, values));
2923 }
2924
2925 final Modification[] mods = new Modification[modList.size()];
2926 modList.toArray(mods);
2927 return mods;
2928 }
2929
2930
2931
2932 /**
2933 * Parses the data available through the provided iterator as the body of a
2934 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional
2935 * newsuperior lines).
2936 *
2937 * @param ldifLines The lines that comprise the LDIF
2938 * representation of the full record being
2939 * parsed.
2940 * @param iterator The iterator to use to access the modify DN
2941 * data.
2942 * @param dn The current DN of the entry.
2943 * @param trailingSpaceBehavior The behavior that should be exhibited when
2944 * encountering attribute values which are not
2945 * base64-encoded but contain trailing spaces.
2946 * @param firstLineNumber The line number for the start of the record.
2947 *
2948 * @return The decoded modify DN change record.
2949 *
2950 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2951 * modify DN change record.
2952 */
2953 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord(
2954 final ArrayList<StringBuilder> ldifLines,
2955 final Iterator<StringBuilder> iterator, final String dn,
2956 final TrailingSpaceBehavior trailingSpaceBehavior,
2957 final long firstLineNumber)
2958 throws LDIFException
2959 {
2960 // The next line must be the new RDN, and it must start with "newrdn:".
2961 StringBuilder line = iterator.next();
2962 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2963 int colonPos = line.indexOf(":");
2964 if ((colonPos < 0) ||
2965 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn")))
2966 {
2967 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get(
2968 firstLineNumber),
2969 firstLineNumber, true, ldifLines, null);
2970 }
2971
2972 final String newRDN;
2973 int length = line.length();
2974 if (length == (colonPos+1))
2975 {
2976 // The colon was the last character on the line. This is not acceptable.
2977 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
2978 firstLineNumber),
2979 firstLineNumber, true, ldifLines, null);
2980 }
2981 else if (line.charAt(colonPos+1) == ':')
2982 {
2983 // Skip over any spaces leading up to the value, and then the rest of the
2984 // string is the base64-encoded new RDN.
2985 int pos = colonPos+2;
2986 while ((pos < length) && (line.charAt(pos) == ' '))
2987 {
2988 pos++;
2989 }
2990
2991 try
2992 {
2993 final byte[] dnBytes = Base64.decode(line.substring(pos));
2994 newRDN = new String(dnBytes, "UTF-8");
2995 }
2996 catch (final ParseException pe)
2997 {
2998 debugException(pe);
2999 throw new LDIFException(
3000 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3001 pe.getMessage()),
3002 firstLineNumber, true, ldifLines, pe);
3003 }
3004 catch (final Exception e)
3005 {
3006 debugException(e);
3007 throw new LDIFException(
3008 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3009 e),
3010 firstLineNumber, true, ldifLines, e);
3011 }
3012 }
3013 else
3014 {
3015 // Skip over any spaces leading up to the value, and then the rest of the
3016 // string is the new RDN.
3017 int pos = colonPos+1;
3018 while ((pos < length) && (line.charAt(pos) == ' '))
3019 {
3020 pos++;
3021 }
3022
3023 newRDN = line.substring(pos);
3024 }
3025
3026 if (newRDN.length() == 0)
3027 {
3028 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3029 firstLineNumber),
3030 firstLineNumber, true, ldifLines, null);
3031 }
3032
3033
3034 // The next line must be the deleteOldRDN flag, and it must start with
3035 // 'deleteoldrdn:'.
3036 if (! iterator.hasNext())
3037 {
3038 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3039 firstLineNumber),
3040 firstLineNumber, true, ldifLines, null);
3041 }
3042
3043 line = iterator.next();
3044 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3045 colonPos = line.indexOf(":");
3046 if ((colonPos < 0) ||
3047 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn")))
3048 {
3049 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3050 firstLineNumber),
3051 firstLineNumber, true, ldifLines, null);
3052 }
3053
3054 final String deleteOldRDNStr;
3055 length = line.length();
3056 if (length == (colonPos+1))
3057 {
3058 // The colon was the last character on the line. This is not acceptable.
3059 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get(
3060 firstLineNumber),
3061 firstLineNumber, true, ldifLines, null);
3062 }
3063 else if (line.charAt(colonPos+1) == ':')
3064 {
3065 // Skip over any spaces leading up to the value, and then the rest of the
3066 // string is the base64-encoded value. This is unusual and
3067 // unnecessary, but is nevertheless acceptable.
3068 int pos = colonPos+2;
3069 while ((pos < length) && (line.charAt(pos) == ' '))
3070 {
3071 pos++;
3072 }
3073
3074 try
3075 {
3076 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3077 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8");
3078 }
3079 catch (final ParseException pe)
3080 {
3081 debugException(pe);
3082 throw new LDIFException(
3083 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3084 firstLineNumber, pe.getMessage()),
3085 firstLineNumber, true, ldifLines, pe);
3086 }
3087 catch (final Exception e)
3088 {
3089 debugException(e);
3090 throw new LDIFException(
3091 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3092 firstLineNumber, e),
3093 firstLineNumber, true, ldifLines, e);
3094 }
3095 }
3096 else
3097 {
3098 // Skip over any spaces leading up to the value, and then the rest of the
3099 // string is the value.
3100 int pos = colonPos+1;
3101 while ((pos < length) && (line.charAt(pos) == ' '))
3102 {
3103 pos++;
3104 }
3105
3106 deleteOldRDNStr = line.substring(pos);
3107 }
3108
3109 final boolean deleteOldRDN;
3110 if (deleteOldRDNStr.equals("0"))
3111 {
3112 deleteOldRDN = false;
3113 }
3114 else if (deleteOldRDNStr.equals("1"))
3115 {
3116 deleteOldRDN = true;
3117 }
3118 else if (deleteOldRDNStr.equalsIgnoreCase("false") ||
3119 deleteOldRDNStr.equalsIgnoreCase("no"))
3120 {
3121 // This is technically illegal, but we'll allow it.
3122 deleteOldRDN = false;
3123 }
3124 else if (deleteOldRDNStr.equalsIgnoreCase("true") ||
3125 deleteOldRDNStr.equalsIgnoreCase("yes"))
3126 {
3127 // This is also technically illegal, but we'll allow it.
3128 deleteOldRDN = false;
3129 }
3130 else
3131 {
3132 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get(
3133 deleteOldRDNStr, firstLineNumber),
3134 firstLineNumber, true, ldifLines, null);
3135 }
3136
3137
3138 // If there is another line, then it must be the new superior DN and it must
3139 // start with "newsuperior:". If this is absent, then it's fine.
3140 final String newSuperiorDN;
3141 if (iterator.hasNext())
3142 {
3143 line = iterator.next();
3144 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3145 colonPos = line.indexOf(":");
3146 if ((colonPos < 0) ||
3147 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior")))
3148 {
3149 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get(
3150 firstLineNumber),
3151 firstLineNumber, true, ldifLines, null);
3152 }
3153
3154 length = line.length();
3155 if (length == (colonPos+1))
3156 {
3157 // The colon was the last character on the line. This is fine.
3158 newSuperiorDN = "";
3159 }
3160 else if (line.charAt(colonPos+1) == ':')
3161 {
3162 // Skip over any spaces leading up to the value, and then the rest of
3163 // the string is the base64-encoded new superior DN.
3164 int pos = colonPos+2;
3165 while ((pos < length) && (line.charAt(pos) == ' '))
3166 {
3167 pos++;
3168 }
3169
3170 try
3171 {
3172 final byte[] dnBytes = Base64.decode(line.substring(pos));
3173 newSuperiorDN = new String(dnBytes, "UTF-8");
3174 }
3175 catch (final ParseException pe)
3176 {
3177 debugException(pe);
3178 throw new LDIFException(
3179 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3180 firstLineNumber, pe.getMessage()),
3181 firstLineNumber, true, ldifLines, pe);
3182 }
3183 catch (final Exception e)
3184 {
3185 debugException(e);
3186 throw new LDIFException(
3187 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3188 firstLineNumber, e),
3189 firstLineNumber, true, ldifLines, e);
3190 }
3191 }
3192 else
3193 {
3194 // Skip over any spaces leading up to the value, and then the rest of
3195 // the string is the new superior DN.
3196 int pos = colonPos+1;
3197 while ((pos < length) && (line.charAt(pos) == ' '))
3198 {
3199 pos++;
3200 }
3201
3202 newSuperiorDN = line.substring(pos);
3203 }
3204 }
3205 else
3206 {
3207 newSuperiorDN = null;
3208 }
3209
3210
3211 // There must not be any more lines.
3212 if (iterator.hasNext())
3213 {
3214 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber),
3215 firstLineNumber, true, ldifLines, null);
3216 }
3217
3218 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN,
3219 newSuperiorDN);
3220 }
3221
3222
3223
3224 /**
3225 * Examines the line contained in the provided buffer to determine whether it
3226 * may contain one or more illegal trailing spaces. If it does, then those
3227 * spaces will either be stripped out or an exception will be thrown to
3228 * indicate that they are illegal.
3229 *
3230 * @param buffer The buffer to be examined.
3231 * @param dn The DN of the LDIF record being parsed. It
3232 * may be {@code null} if the DN is not yet
3233 * known (e.g., because the provided line is
3234 * expected to contain that DN).
3235 * @param firstLineNumber The approximate line number in the LDIF
3236 * source on which the LDIF record begins.
3237 * @param trailingSpaceBehavior The behavior that should be exhibited when
3238 * encountering attribute values which are not
3239 * base64-encoded but contain trailing spaces.
3240 *
3241 * @throws LDIFException If the line contained in the provided buffer ends
3242 * with one or more illegal trailing spaces and
3243 * {@code stripTrailingSpaces} was provided with a
3244 * value of {@code false}.
3245 */
3246 private static void handleTrailingSpaces(final StringBuilder buffer,
3247 final String dn, final long firstLineNumber,
3248 final TrailingSpaceBehavior trailingSpaceBehavior)
3249 throws LDIFException
3250 {
3251 int pos = buffer.length() - 1;
3252 boolean trailingFound = false;
3253 while ((pos >= 0) && (buffer.charAt(pos) == ' '))
3254 {
3255 trailingFound = true;
3256 pos--;
3257 }
3258
3259 if (trailingFound && (buffer.charAt(pos) != ':'))
3260 {
3261 switch (trailingSpaceBehavior)
3262 {
3263 case STRIP:
3264 buffer.setLength(pos+1);
3265 break;
3266
3267 case REJECT:
3268 if (dn == null)
3269 {
3270 throw new LDIFException(
3271 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber,
3272 buffer.toString()),
3273 firstLineNumber, true);
3274 }
3275 else
3276 {
3277 throw new LDIFException(
3278 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn,
3279 firstLineNumber, buffer.toString()),
3280 firstLineNumber, true);
3281 }
3282
3283 case RETAIN:
3284 default:
3285 // No action will be taken.
3286 break;
3287 }
3288 }
3289 }
3290
3291
3292
3293 /**
3294 * This represents an unparsed LDIFRecord. It stores the line number of the
3295 * first line of the record and each line of the record.
3296 */
3297 private static final class UnparsedLDIFRecord
3298 {
3299 private final ArrayList<StringBuilder> lineList;
3300 private final long firstLineNumber;
3301 private final Exception failureCause;
3302 private final boolean isEOF;
3303 private final DuplicateValueBehavior duplicateValueBehavior;
3304 private final Schema schema;
3305 private final TrailingSpaceBehavior trailingSpaceBehavior;
3306
3307
3308
3309 /**
3310 * Constructor.
3311 *
3312 * @param lineList The lines that comprise the LDIF record.
3313 * @param duplicateValueBehavior The behavior to exhibit if the entry
3314 * contains duplicate attribute values.
3315 * @param trailingSpaceBehavior Specifies the behavior to exhibit when
3316 * encountering trailing spaces in
3317 * non-base64-encoded attribute values.
3318 * @param schema The schema to use when parsing, if
3319 * applicable.
3320 * @param firstLineNumber The first line number of the LDIF record.
3321 */
3322 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList,
3323 final DuplicateValueBehavior duplicateValueBehavior,
3324 final TrailingSpaceBehavior trailingSpaceBehavior,
3325 final Schema schema, final long firstLineNumber)
3326 {
3327 this.lineList = lineList;
3328 this.firstLineNumber = firstLineNumber;
3329 this.duplicateValueBehavior = duplicateValueBehavior;
3330 this.trailingSpaceBehavior = trailingSpaceBehavior;
3331 this.schema = schema;
3332
3333 failureCause = null;
3334 isEOF =
3335 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty());
3336 }
3337
3338
3339
3340 /**
3341 * Constructor.
3342 *
3343 * @param failureCause The Exception thrown when reading from the input.
3344 */
3345 private UnparsedLDIFRecord(final Exception failureCause)
3346 {
3347 this.failureCause = failureCause;
3348
3349 lineList = null;
3350 firstLineNumber = 0;
3351 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
3352 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
3353 schema = null;
3354 isEOF = false;
3355 }
3356
3357
3358
3359 /**
3360 * Return the lines that comprise the LDIF record.
3361 *
3362 * @return The lines that comprise the LDIF record.
3363 */
3364 private ArrayList<StringBuilder> getLineList()
3365 {
3366 return lineList;
3367 }
3368
3369
3370
3371 /**
3372 * Retrieves the behavior to exhibit when encountering duplicate attribute
3373 * values.
3374 *
3375 * @return The behavior to exhibit when encountering duplicate attribute
3376 * values.
3377 */
3378 private DuplicateValueBehavior getDuplicateValueBehavior()
3379 {
3380 return duplicateValueBehavior;
3381 }
3382
3383
3384
3385 /**
3386 * Retrieves the behavior that should be exhibited when encountering
3387 * attribute values which are not base64-encoded but contain trailing
3388 * spaces. The LDIF specification strongly recommends that any value which
3389 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK
3390 * LDIF parser may be configured to automatically strip these spaces, to
3391 * preserve them, or to reject any entry or change record containing them.
3392 *
3393 * @return The behavior that should be exhibited when encountering
3394 * attribute values which are not base64-encoded but contain
3395 * trailing spaces.
3396 */
3397 private TrailingSpaceBehavior getTrailingSpaceBehavior()
3398 {
3399 return trailingSpaceBehavior;
3400 }
3401
3402
3403
3404 /**
3405 * Retrieves the schema that should be used when parsing the record, if
3406 * applicable.
3407 *
3408 * @return The schema that should be used when parsing the record, or
3409 * {@code null} if none should be used.
3410 */
3411 private Schema getSchema()
3412 {
3413 return schema;
3414 }
3415
3416
3417
3418 /**
3419 * Return the first line number of the LDIF record.
3420 *
3421 * @return The first line number of the LDIF record.
3422 */
3423 private long getFirstLineNumber()
3424 {
3425 return firstLineNumber;
3426 }
3427
3428
3429
3430 /**
3431 * Return {@code true} iff the end of the input was reached.
3432 *
3433 * @return {@code true} iff the end of the input was reached.
3434 */
3435 private boolean isEOF()
3436 {
3437 return isEOF;
3438 }
3439
3440
3441
3442 /**
3443 * Returns the reason that reading the record lines failed. This normally
3444 * is only non-null if something bad happened to the input stream (like
3445 * a disk read error).
3446 *
3447 * @return The reason that reading the record lines failed.
3448 */
3449 private Exception getFailureCause()
3450 {
3451 return failureCause;
3452 }
3453 }
3454
3455
3456 /**
3457 * When processing in asynchronous mode, this thread is responsible for
3458 * reading the raw unparsed records from the input and submitting them for
3459 * processing.
3460 */
3461 private final class LineReaderThread
3462 extends Thread
3463 {
3464 /**
3465 * Constructor.
3466 */
3467 private LineReaderThread()
3468 {
3469 super("Asynchronous LDIF line reader");
3470 setDaemon(true);
3471 }
3472
3473
3474
3475 /**
3476 * Reads raw, unparsed records from the input and submits them for
3477 * processing until the input is finished or closed.
3478 */
3479 @Override()
3480 public void run()
3481 {
3482 try
3483 {
3484 boolean stopProcessing = false;
3485 while (!stopProcessing)
3486 {
3487 UnparsedLDIFRecord unparsedRecord = null;
3488 try
3489 {
3490 unparsedRecord = readUnparsedRecord();
3491 }
3492 catch (IOException e)
3493 {
3494 debugException(e);
3495 unparsedRecord = new UnparsedLDIFRecord(e);
3496 stopProcessing = true;
3497 }
3498 catch (Exception e)
3499 {
3500 debugException(e);
3501 unparsedRecord = new UnparsedLDIFRecord(e);
3502 }
3503
3504 try
3505 {
3506 asyncParser.submit(unparsedRecord);
3507 }
3508 catch (InterruptedException e)
3509 {
3510 debugException(e);
3511 // If this thread is interrupted, then someone wants us to stop
3512 // processing, so that's what we'll do.
3513 stopProcessing = true;
3514 }
3515
3516 if ((unparsedRecord == null) || (unparsedRecord.isEOF()))
3517 {
3518 stopProcessing = true;
3519 }
3520 }
3521 }
3522 finally
3523 {
3524 try
3525 {
3526 asyncParser.shutdown();
3527 }
3528 catch (InterruptedException e)
3529 {
3530 debugException(e);
3531 }
3532 finally
3533 {
3534 asyncParsingComplete.set(true);
3535 }
3536 }
3537 }
3538 }
3539
3540
3541
3542 /**
3543 * Used to parse Records asynchronously.
3544 */
3545 private final class RecordParser implements Processor<UnparsedLDIFRecord,
3546 LDIFRecord>
3547 {
3548 /**
3549 * {@inheritDoc}
3550 */
3551 public LDIFRecord process(final UnparsedLDIFRecord input)
3552 throws LDIFException
3553 {
3554 LDIFRecord record = decodeRecord(input, relativeBasePath);
3555
3556 if ((record instanceof Entry) && (entryTranslator != null))
3557 {
3558 record = entryTranslator.translate((Entry) record,
3559 input.getFirstLineNumber());
3560
3561 if (record == null)
3562 {
3563 record = SKIP_ENTRY;
3564 }
3565 }
3566 return record;
3567 }
3568 }
3569 }