001    /*
002     * Copyright 2007-2013 UnboundID Corp.
003     * All Rights Reserved.
004     */
005    /*
006     * Copyright (C) 2008-2013 UnboundID Corp.
007     *
008     * This program is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU General Public License (GPLv2 only)
010     * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011     * as published by the Free Software Foundation.
012     *
013     * This program is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program; if not, see <http://www.gnu.org/licenses>.
020     */
021    package com.unboundid.ldif;
022    
023    
024    
025    import java.io.BufferedReader;
026    import java.io.BufferedWriter;
027    import java.io.File;
028    import java.io.FileInputStream;
029    import java.io.FileWriter;
030    import java.io.InputStream;
031    import java.io.InputStreamReader;
032    import java.io.IOException;
033    import java.text.ParseException;
034    import java.util.ArrayList;
035    import java.util.Collection;
036    import java.util.Iterator;
037    import java.util.LinkedHashMap;
038    import java.util.List;
039    import java.util.concurrent.BlockingQueue;
040    import java.util.concurrent.ArrayBlockingQueue;
041    import java.util.concurrent.TimeUnit;
042    import java.util.concurrent.atomic.AtomicBoolean;
043    import java.nio.charset.Charset;
044    
045    import com.unboundid.asn1.ASN1OctetString;
046    import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule;
047    import com.unboundid.ldap.matchingrules.MatchingRule;
048    import com.unboundid.ldap.sdk.Attribute;
049    import com.unboundid.ldap.sdk.Entry;
050    import com.unboundid.ldap.sdk.Modification;
051    import com.unboundid.ldap.sdk.ModificationType;
052    import com.unboundid.ldap.sdk.LDAPException;
053    import com.unboundid.ldap.sdk.schema.Schema;
054    import com.unboundid.util.AggregateInputStream;
055    import com.unboundid.util.Base64;
056    import com.unboundid.util.LDAPSDKThreadFactory;
057    import com.unboundid.util.ThreadSafety;
058    import com.unboundid.util.ThreadSafetyLevel;
059    import com.unboundid.util.parallel.AsynchronousParallelProcessor;
060    import com.unboundid.util.parallel.Result;
061    import com.unboundid.util.parallel.ParallelProcessor;
062    import com.unboundid.util.parallel.Processor;
063    
064    import static com.unboundid.ldif.LDIFMessages.*;
065    import static com.unboundid.util.Debug.*;
066    import static com.unboundid.util.StaticUtils.*;
067    import static com.unboundid.util.Validator.*;
068    
069    /**
070     * This class provides an LDIF reader, which can be used to read and decode
071     * entries and change records from a data source using the LDAP Data Interchange
072     * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>.
073     * <BR>
074     * This class is not synchronized.  If multiple threads read from the
075     * LDIFReader, they must be synchronized externally.
076     * <BR><BR>
077     * <H2>Example</H2>
078     * The following example iterates through all entries contained in an LDIF file
079     * and attempts to add them to a directory server:
080     * <PRE>
081     *   LDIFReader ldifReader = new LDIFReader(pathToLDIFFile);
082     *
083     *   while (true)
084     *   {
085     *     Entry entry;
086     *     try
087     *     {
088     *       entry = ldifReader.readEntry();
089     *       if (entry == null)
090     *       {
091     *         System.err.println("All entries have been processed.");
092     *         break;
093     *       }
094     *     }
095     *     catch (LDIFException le)
096     *     {
097     *       if (le.mayContinueReading())
098     *       {
099     *         System.err.println("A recoverable occurred while attempting to " +
100     *              "read an entry at or near line number " + le.getLineNumber() +
101     *              ":  " + le.getMessage());
102     *         System.err.println("The entry will be skipped.");
103     *         continue;
104     *       }
105     *       else
106     *       {
107     *         System.err.println("An unrecoverable occurred while attempting to " +
108     *              "read an entry at or near line number " + le.getLineNumber() +
109     *              ":  " + le.getMessage());
110     *         System.err.println("LDIF processing will be aborted.");
111     *         break;
112     *       }
113     *     }
114     *     catch (IOException ioe)
115     *     {
116     *       System.err.println("An I/O error occurred while attempting to read " +
117     *            "from the LDIF file:  " + ioe.getMessage());
118     *       System.err.println("LDIF processing will be aborted.");
119     *       break;
120     *     }
121     *
122     *     try
123     *     {
124     *       connection.add(entry);
125     *       System.out.println("Successfully added entry " + entry.getDN());
126     *     }
127     *     catch (LDAPException le)
128     *     {
129     *       System.err.println("Unable to add entry " + entry.getDN() + " -- " +
130     *            le.getMessage());
131     *     }
132     *   }
133     *
134     *   ldifReader.close();
135     * </PRE>
136     */
137    @ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
138    public final class LDIFReader
139    {
140      /**
141       * The default buffer size (128KB) that will be used when reading from the
142       * data source.
143       */
144      public static final int DEFAULT_BUFFER_SIZE = 128 * 1024;
145    
146    
147    
148      /*
149       * When processing asynchronously, this determines how many of the allocated
150       * worker threads are used to parse each batch of read entries.
151       */
152      private static final int ASYNC_MIN_PER_PARSING_THREAD = 3;
153    
154    
155    
156      /**
157       * When processing asynchronously, this specifies the size of the pending and
158       * completed queues.
159       */
160      private static final int ASYNC_QUEUE_SIZE = 500;
161    
162    
163    
164      /**
165       * Special entry used internally to signal that the LDIFReaderEntryTranslator
166       * has signalled that a read Entry should be skipped by returning null,
167       * which normally implies EOF.
168       */
169      private static final Entry SKIP_ENTRY = new Entry("cn=skipped");
170    
171    
172    
173      /**
174       * The default base path that will be prepended to relative paths.  It will
175       * end with a trailing slash.
176       */
177      private static final String DEFAULT_RELATIVE_BASE_PATH;
178      static
179      {
180        final File currentDir;
181        String currentDirString = System.getProperty("user.dir");
182        if (currentDirString == null)
183        {
184          currentDir = new File(".");
185        }
186        else
187        {
188          currentDir = new File(currentDirString);
189        }
190    
191        final String currentDirAbsolutePath = currentDir.getAbsolutePath();
192        if (currentDirAbsolutePath.endsWith(File.separator))
193        {
194          DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath;
195        }
196        else
197        {
198          DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator;
199        }
200      }
201    
202    
203    
204      // The buffered reader that will be used to read LDIF data.
205      private final BufferedReader reader;
206    
207      // The behavior that should be exhibited when encountering duplicate attribute
208      // values.
209      private volatile DuplicateValueBehavior duplicateValueBehavior;
210    
211      // A line number counter.
212      private long lineNumberCounter = 0;
213    
214      private final LDIFReaderEntryTranslator entryTranslator;
215    
216      // The schema that will be used when processing, if applicable.
217      private Schema schema;
218    
219      // Specifies the base path that will be prepended to relative paths for file
220      // URLs.
221      private volatile String relativeBasePath;
222    
223      // The behavior that should be exhibited with regard to illegal trailing
224      // spaces in attribute values.
225      private volatile TrailingSpaceBehavior trailingSpaceBehavior;
226    
227      // True iff we are processing asynchronously.
228      private final boolean isAsync;
229    
230      //
231      // The following only apply to asynchronous processing.
232      //
233    
234      // Parses entries asynchronously.
235      private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord>
236           asyncParser;
237    
238      // Set to true when the end of the input is reached.
239      private final AtomicBoolean asyncParsingComplete;
240    
241      // The records that have been read and parsed.
242      private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>>
243           asyncParsedRecords;
244    
245    
246    
247      /**
248       * Creates a new LDIF reader that will read data from the specified file.
249       *
250       * @param  path  The path to the file from which the data is to be read.  It
251       *               must not be {@code null}.
252       *
253       * @throws  IOException  If a problem occurs while opening the file for
254       *                       reading.
255       */
256      public LDIFReader(final String path)
257             throws IOException
258      {
259        this(new FileInputStream(path));
260      }
261    
262    
263    
264      /**
265       * Creates a new LDIF reader that will read data from the specified file
266       * and parses the LDIF records asynchronously using the specified number of
267       * threads.
268       *
269       * @param  path  The path to the file from which the data is to be read.  It
270       *               must not be {@code null}.
271       * @param  numParseThreads  If this value is greater than zero, then the
272       *                          specified number of threads will be used to
273       *                          asynchronously read and parse the LDIF file.
274       *
275       * @throws  IOException  If a problem occurs while opening the file for
276       *                       reading.
277       *
278       * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
279       *      constructor for more details about asynchronous processing.
280       */
281      public LDIFReader(final String path, final int numParseThreads)
282             throws IOException
283      {
284        this(new FileInputStream(path), numParseThreads);
285      }
286    
287    
288    
289      /**
290       * Creates a new LDIF reader that will read data from the specified file.
291       *
292       * @param  file  The file from which the data is to be read.  It must not be
293       *               {@code null}.
294       *
295       * @throws  IOException  If a problem occurs while opening the file for
296       *                       reading.
297       */
298      public LDIFReader(final File file)
299             throws IOException
300      {
301        this(new FileInputStream(file));
302      }
303    
304    
305    
306      /**
307       * Creates a new LDIF reader that will read data from the specified file
308       * and optionally parses the LDIF records asynchronously using the specified
309       * number of threads.
310       *
311       * @param  file             The file from which the data is to be read.  It
312       *                          must not be {@code null}.
313       * @param  numParseThreads  If this value is greater than zero, then the
314       *                          specified number of threads will be used to
315       *                          asynchronously read and parse the LDIF file.
316       *
317       * @throws  IOException  If a problem occurs while opening the file for
318       *                       reading.
319       */
320      public LDIFReader(final File file, final int numParseThreads)
321             throws IOException
322      {
323        this(new FileInputStream(file), numParseThreads);
324      }
325    
326    
327    
328      /**
329       * Creates a new LDIF reader that will read data from the specified files in
330       * the order in which they are provided and optionally parses the LDIF records
331       * asynchronously using the specified number of threads.
332       *
333       * @param  files            The files from which the data is to be read.  It
334       *                          must not be {@code null} or empty.
335       * @param  numParseThreads  If this value is greater than zero, then the
336       *                          specified number of threads will be used to
337       *                          asynchronously read and parse the LDIF file.
338       * @param entryTranslator   The LDIFReaderEntryTranslator to apply to entries
339       *                          before they are returned.  This is normally
340       *                          {@code null}, which causes entries to be returned
341       *                          unaltered. This is particularly useful when
342       *                          parsing the input file in parallel because the
343       *                          entry translation is also done in parallel.
344       *
345       * @throws  IOException  If a problem occurs while opening the file for
346       *                       reading.
347       */
348      public LDIFReader(final File[] files, final int numParseThreads,
349                        final LDIFReaderEntryTranslator entryTranslator)
350             throws IOException
351      {
352        this(createAggregateInputStream(files), numParseThreads, entryTranslator);
353      }
354    
355    
356    
357      /**
358       * Creates a new aggregate input stream that will read data from the specified
359       * files.  If there are multiple files, then a "padding" file will be inserted
360       * between them to ensure that there is at least one blank line between the
361       * end of one file and the beginning of another.
362       *
363       * @param  files  The files from which the data is to be read.  It must not be
364       *                {@code null} or empty.
365       *
366       * @return  The input stream to use to read data from the provided files.
367       *
368       * @throws  IOException  If a problem is encountered while attempting to
369       *                       create the input stream.
370       */
371      private static InputStream createAggregateInputStream(final File... files)
372              throws IOException
373      {
374        if (files.length == 0)
375        {
376          throw new IOException(ERR_READ_NO_LDIF_FILES.get());
377        }
378        else if (files.length == 1)
379        {
380          return new FileInputStream(files[0]);
381        }
382        else
383        {
384          final File spacerFile =
385               File.createTempFile("ldif-reader-spacer", ".ldif");
386          spacerFile.deleteOnExit();
387    
388          final BufferedWriter spacerWriter =
389               new BufferedWriter(new FileWriter(spacerFile));
390          try
391          {
392            spacerWriter.newLine();
393            spacerWriter.newLine();
394          }
395          finally
396          {
397            spacerWriter.close();
398          }
399    
400          final File[] returnArray = new File[(files.length * 2) - 1];
401          returnArray[0] = files[0];
402    
403          int pos = 1;
404          for (int i=1; i < files.length; i++)
405          {
406            returnArray[pos++] = spacerFile;
407            returnArray[pos++] = files[i];
408          }
409    
410          return new AggregateInputStream(returnArray);
411        }
412      }
413    
414    
415    
416      /**
417       * Creates a new LDIF reader that will read data from the provided input
418       * stream.
419       *
420       * @param  inputStream  The input stream from which the data is to be read.
421       *                      It must not be {@code null}.
422       */
423      public LDIFReader(final InputStream inputStream)
424      {
425        this(inputStream, 0);
426      }
427    
428    
429    
430      /**
431       * Creates a new LDIF reader that will read data from the specified stream
432       * and parses the LDIF records asynchronously using the specified number of
433       * threads.
434       *
435       * @param  inputStream  The input stream from which the data is to be read.
436       *                      It must not be {@code null}.
437       * @param  numParseThreads  If this value is greater than zero, then the
438       *                          specified number of threads will be used to
439       *                          asynchronously read and parse the LDIF file.
440       *
441       * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
442       *      constructor for more details about asynchronous processing.
443       */
444      public LDIFReader(final InputStream inputStream, final int numParseThreads)
445      {
446        // UTF-8 is required by RFC 2849.  Java guarantees it's always available.
447        this(new BufferedReader(new InputStreamReader(inputStream,
448                                                      Charset.forName("UTF-8")),
449                                DEFAULT_BUFFER_SIZE),
450             numParseThreads);
451      }
452    
453    
454    
455      /**
456       * Creates a new LDIF reader that will read data from the specified stream
457       * and parses the LDIF records asynchronously using the specified number of
458       * threads.
459       *
460       * @param  inputStream  The input stream from which the data is to be read.
461       *                      It must not be {@code null}.
462       * @param  numParseThreads  If this value is greater than zero, then the
463       *                          specified number of threads will be used to
464       *                          asynchronously read and parse the LDIF file.
465       * @param entryTranslator  The LDIFReaderEntryTranslator to apply to read
466       *                         entries before they are returned.  This is normally
467       *                         {@code null}, which causes entries to be returned
468       *                         unaltered. This is particularly useful when parsing
469       *                         the input file in parallel because the entry
470       *                         translation is also done in parallel.
471       *
472       * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
473       *      constructor for more details about asynchronous processing.
474       */
475      public LDIFReader(final InputStream inputStream, final int numParseThreads,
476                        final LDIFReaderEntryTranslator entryTranslator)
477      {
478        // UTF-8 is required by RFC 2849.  Java guarantees it's always available.
479        this(new BufferedReader(new InputStreamReader(inputStream,
480                                                      Charset.forName("UTF-8")),
481                                DEFAULT_BUFFER_SIZE),
482             numParseThreads, entryTranslator);
483      }
484    
485    
486    
487      /**
488       * Creates a new LDIF reader that will use the provided buffered reader to
489       * read the LDIF data.  The encoding of the underlying Reader must be set to
490       * "UTF-8" as required by RFC 2849.
491       *
492       * @param  reader  The buffered reader that will be used to read the LDIF
493       *                 data.  It must not be {@code null}.
494       */
495      public LDIFReader(final BufferedReader reader)
496      {
497        this(reader, 0);
498      }
499    
500    
501    
502      /**
503       * Creates a new LDIF reader that will read data from the specified buffered
504       * reader and parses the LDIF records asynchronously using the specified
505       * number of threads.  The encoding of the underlying Reader must be set to
506       * "UTF-8" as required by RFC 2849.
507       *
508       * @param reader The buffered reader that will be used to read the LDIF data.
509       *               It must not be {@code null}.
510       * @param  numParseThreads  If this value is greater than zero, then the
511       *                          specified number of threads will be used to
512       *                          asynchronously read and parse the LDIF file.
513       *
514       * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
515       *      constructor for more details about asynchronous processing.
516       */
517      public LDIFReader(final BufferedReader reader, final int numParseThreads)
518      {
519        this(reader, numParseThreads, null);
520      }
521    
522    
523    
524      /**
525       * Creates a new LDIF reader that will read data from the specified buffered
526       * reader and parses the LDIF records asynchronously using the specified
527       * number of threads.  The encoding of the underlying Reader must be set to
528       * "UTF-8" as required by RFC 2849.
529       *
530       * @param reader The buffered reader that will be used to read the LDIF data.
531       *               It must not be {@code null}.
532       * @param  numParseThreads  If this value is greater than zero, then the
533       *                          specified number of threads will be used to
534       *                          asynchronously read and parse the LDIF file.
535       *                          This should only be set to greater than zero when
536       *                          performance analysis has demonstrated that reading
537       *                          and parsing the LDIF is a bottleneck.  The default
538       *                          synchronous processing is normally fast enough.
539       *                          There is little benefit in passing in a value
540       *                          greater than four (unless there is an
541       *                          LDIFReaderEntryTranslator that does time-consuming
542       *                          processing).  A value of zero implies the
543       *                          default behavior of reading and parsing LDIF
544       *                          records synchronously when one of the read
545       *                          methods is called.
546       * @param entryTranslator  The LDIFReaderEntryTranslator to apply to read
547       *                         entries before they are returned.  This is normally
548       *                         {@code null}, which causes entries to be returned
549       *                         unaltered. This is particularly useful when parsing
550       *                         the input file in parallel because the entry
551       *                         translation is also done in parallel.
552       */
553      public LDIFReader(final BufferedReader reader,
554                        final int numParseThreads,
555                        final LDIFReaderEntryTranslator entryTranslator)
556      {
557        ensureNotNull(reader);
558        ensureTrue(numParseThreads >= 0,
559                   "LDIFReader.numParseThreads must not be negative.");
560    
561        this.reader = reader;
562        this.entryTranslator = entryTranslator;
563    
564        duplicateValueBehavior = DuplicateValueBehavior.STRIP;
565        trailingSpaceBehavior  = TrailingSpaceBehavior.REJECT;
566    
567        relativeBasePath = DEFAULT_RELATIVE_BASE_PATH;
568    
569        if (numParseThreads == 0)
570        {
571          isAsync = false;
572          asyncParser = null;
573          asyncParsingComplete = null;
574          asyncParsedRecords = null;
575        }
576        else
577        {
578          isAsync = true;
579          asyncParsingComplete = new AtomicBoolean(false);
580    
581          // Decodes entries in parallel.
582          final LDAPSDKThreadFactory threadFactory =
583               new LDAPSDKThreadFactory("LDIFReader Worker", true, null);
584          final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser =
585               new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>(
586                    new RecordParser(), threadFactory, numParseThreads,
587                    ASYNC_MIN_PER_PARSING_THREAD);
588    
589          final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new
590               ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE);
591    
592          // The output queue must be a little more than twice as big as the input
593          // queue to more easily handle being shutdown in the middle of processing
594          // when the queues are full and threads are blocked.
595          asyncParsedRecords = new ArrayBlockingQueue
596               <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100);
597    
598          asyncParser = new AsynchronousParallelProcessor
599               <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser,
600                                                asyncParsedRecords);
601    
602          final LineReaderThread lineReaderThread = new LineReaderThread();
603          lineReaderThread.start();
604        }
605      }
606    
607    
608    
609      /**
610       * Reads entries from the LDIF file with the specified path and returns them
611       * as a {@code List}.  This is a convenience method that should only be used
612       * for data sets that are small enough so that running out of memory isn't a
613       * concern.
614       *
615       * @param  path  The path to the LDIF file containing the entries to be read.
616       *
617       * @return  A list of the entries read from the given LDIF file.
618       *
619       * @throws  IOException  If a problem occurs while attempting to read data
620       *                       from the specified file.
621       *
622       * @throws  LDIFException  If a problem is encountered while attempting to
623       *                         decode data read as LDIF.
624       */
625      public static List<Entry> readEntries(final String path)
626             throws IOException, LDIFException
627      {
628        return readEntries(new LDIFReader(path));
629      }
630    
631    
632    
633      /**
634       * Reads entries from the specified LDIF file and returns them as a
635       * {@code List}.  This is a convenience method that should only be used for
636       * data sets that are small enough so that running out of memory isn't a
637       * concern.
638       *
639       * @param  file  A reference to the LDIF file containing the entries to be
640       *               read.
641       *
642       * @return  A list of the entries read from the given LDIF file.
643       *
644       * @throws  IOException  If a problem occurs while attempting to read data
645       *                       from the specified file.
646       *
647       * @throws  LDIFException  If a problem is encountered while attempting to
648       *                         decode data read as LDIF.
649       */
650      public static List<Entry> readEntries(final File file)
651             throws IOException, LDIFException
652      {
653        return readEntries(new LDIFReader(file));
654      }
655    
656    
657    
658      /**
659       * Reads and decodes LDIF entries from the provided input stream and
660       * returns them as a {@code List}.  This is a convenience method that should
661       * only be used for data sets that are small enough so that running out of
662       * memory isn't a concern.
663       *
664       * @param  inputStream  The input stream from which the entries should be
665       *                      read.  The input stream will be closed before
666       *                      returning.
667       *
668       * @return  A list of the entries read from the given input stream.
669       *
670       * @throws  IOException  If a problem occurs while attempting to read data
671       *                       from the input stream.
672       *
673       * @throws  LDIFException  If a problem is encountered while attempting to
674       *                         decode data read as LDIF.
675       */
676      public static List<Entry> readEntries(final InputStream inputStream)
677             throws IOException, LDIFException
678      {
679        return readEntries(new LDIFReader(inputStream));
680      }
681    
682    
683    
684      /**
685       * Reads entries from the provided LDIF reader and returns them as a list.
686       *
687       * @param  reader  The reader from which the entries should be read.  It will
688       *                 be closed before returning.
689       *
690       * @return  A list of the entries read from the provided reader.
691       *
692       * @throws  IOException  If a problem was encountered while attempting to read
693       *                       data from the LDIF data source.
694       *
695       * @throws  LDIFException  If a problem is encountered while attempting to
696       *                         decode data read as LDIF.
697       */
698      private static List<Entry> readEntries(final LDIFReader reader)
699              throws IOException, LDIFException
700      {
701        try
702        {
703          final ArrayList<Entry> entries = new ArrayList<Entry>(10);
704          while (true)
705          {
706            final Entry e = reader.readEntry();
707            if (e == null)
708            {
709              break;
710            }
711    
712            entries.add(e);
713          }
714    
715          return entries;
716        }
717        finally
718        {
719          reader.close();
720        }
721      }
722    
723    
724    
725      /**
726       * Closes this LDIF reader and the underlying LDIF source.
727       *
728       * @throws  IOException  If a problem occurs while closing the underlying LDIF
729       *                       source.
730       */
731      public void close()
732             throws IOException
733      {
734        reader.close();
735    
736        if (isAsync())
737        {
738          // Closing the reader will trigger the LineReaderThread to complete, but
739          // not if it's blocked submitting the next UnparsedLDIFRecord.  To avoid
740          // this, we clear out the completed output queue, which is larger than
741          // the input queue, so the LineReaderThread will stop reading and
742          // shutdown the asyncParser.
743          asyncParsedRecords.clear();
744        }
745      }
746    
747    
748    
749      /**
750       * Indicates whether to ignore any duplicate values encountered while reading
751       * LDIF records.
752       *
753       * @return  {@code true} if duplicate values should be ignored, or
754       *          {@code false} if any LDIF records containing duplicate values
755       *          should be rejected.
756       *
757       * @deprecated  Use the {@link #getDuplicateValueBehavior} method instead.
758       */
759      @Deprecated()
760      public boolean ignoreDuplicateValues()
761      {
762        return (duplicateValueBehavior == DuplicateValueBehavior.STRIP);
763      }
764    
765    
766    
767      /**
768       * Specifies whether to ignore any duplicate values encountered while reading
769       * LDIF records.
770       *
771       * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
772       *                                attribute values encountered while reading
773       *                                LDIF records.
774       *
775       * @deprecated  Use the {@link #setDuplicateValueBehavior} method instead.
776       */
777      @Deprecated()
778      public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues)
779      {
780        if (ignoreDuplicateValues)
781        {
782          duplicateValueBehavior = DuplicateValueBehavior.STRIP;
783        }
784        else
785        {
786          duplicateValueBehavior = DuplicateValueBehavior.REJECT;
787        }
788      }
789    
790    
791    
792      /**
793       * Retrieves the behavior that should be exhibited if the LDIF reader
794       * encounters an entry with duplicate values.
795       *
796       * @return  The behavior that should be exhibited if the LDIF reader
797       *          encounters an entry with duplicate values.
798       */
799      public DuplicateValueBehavior getDuplicateValueBehavior()
800      {
801        return duplicateValueBehavior;
802      }
803    
804    
805    
806      /**
807       * Specifies the behavior that should be exhibited if the LDIF reader
808       * encounters an entry with duplicate values.
809       *
810       * @param  duplicateValueBehavior  The behavior that should be exhibited if
811       *                                 the LDIF reader encounters an entry with
812       *                                 duplicate values.
813       */
814      public void setDuplicateValueBehavior(
815                       final DuplicateValueBehavior duplicateValueBehavior)
816      {
817        this.duplicateValueBehavior = duplicateValueBehavior;
818      }
819    
820    
821    
822      /**
823       * Indicates whether to strip off any illegal trailing spaces that may appear
824       * in LDIF records (e.g., after an entry DN or attribute value).  The LDIF
825       * specification strongly recommends that any value which legitimately
826       * contains trailing spaces be base64-encoded, and any spaces which appear
827       * after the end of non-base64-encoded values may therefore be considered
828       * invalid.  If any such trailing spaces are encountered in an LDIF record and
829       * they are not to be stripped, then an {@link LDIFException} will be thrown
830       * for that record.
831       * <BR><BR>
832       * Note that this applies only to spaces after the end of a value, and not to
833       * spaces which may appear at the end of a line for a value that is wrapped
834       * and continued on the next line.
835       *
836       * @return  {@code true} if illegal trailing spaces should be stripped off, or
837       *          {@code false} if LDIF records containing illegal trailing spaces
838       *          should be rejected.
839       *
840       * @deprecated  Use the {@link #getTrailingSpaceBehavior} method instead.
841       */
842      @Deprecated()
843      public boolean stripTrailingSpaces()
844      {
845        return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP);
846      }
847    
848    
849    
850      /**
851       * Specifies whether to strip off any illegal trailing spaces that may appear
852       * in LDIF records (e.g., after an entry DN or attribute value).  The LDIF
853       * specification strongly recommends that any value which legitimately
854       * contains trailing spaces be base64-encoded, and any spaces which appear
855       * after the end of non-base64-encoded values may therefore be considered
856       * invalid.  If any such trailing spaces are encountered in an LDIF record and
857       * they are not to be stripped, then an {@link LDIFException} will be thrown
858       * for that record.
859       * <BR><BR>
860       * Note that this applies only to spaces after the end of a value, and not to
861       * spaces which may appear at the end of a line for a value that is wrapped
862       * and continued on the next line.
863       *
864       * @param  stripTrailingSpaces  Indicates whether to strip off any illegal
865       *                              trailing spaces, or {@code false} if LDIF
866       *                              records containing them should be rejected.
867       *
868       * @deprecated  Use the {@link #setTrailingSpaceBehavior} method instead.
869       */
870      @Deprecated()
871      public void setStripTrailingSpaces(final boolean stripTrailingSpaces)
872      {
873        trailingSpaceBehavior = stripTrailingSpaces
874             ? TrailingSpaceBehavior.STRIP
875             : TrailingSpaceBehavior.REJECT;
876      }
877    
878    
879    
880      /**
881       * Retrieves the behavior that should be exhibited when encountering attribute
882       * values which are not base64-encoded but contain trailing spaces.  The LDIF
883       * specification strongly recommends that any value which legitimately
884       * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
885       * may be configured to automatically strip these spaces, to preserve them, or
886       * to reject any entry or change record containing them.
887       *
888       * @return  The behavior that should be exhibited when encountering attribute
889       *          values which are not base64-encoded but contain trailing spaces.
890       */
891      public TrailingSpaceBehavior getTrailingSpaceBehavior()
892      {
893        return trailingSpaceBehavior;
894      }
895    
896    
897    
898      /**
899       * Specifies the behavior that should be exhibited when encountering attribute
900       * values which are not base64-encoded but contain trailing spaces.  The LDIF
901       * specification strongly recommends that any value which legitimately
902       * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
903       * may be configured to automatically strip these spaces, to preserve them, or
904       * to reject any entry or change record containing them.
905       *
906       * @param  trailingSpaceBehavior  The behavior that should be exhibited when
907       *                                encountering attribute values which are not
908       *                                base64-encoded but contain trailing spaces.
909       */
910      public void setTrailingSpaceBehavior(
911                       final TrailingSpaceBehavior trailingSpaceBehavior)
912      {
913        this.trailingSpaceBehavior = trailingSpaceBehavior;
914      }
915    
916    
917    
918      /**
919       * Retrieves the base path that will be prepended to relative paths in order
920       * to obtain an absolute path.  This will only be used for "file:" URLs that
921       * have paths which do not begin with a slash.
922       *
923       * @return  The base path that will be prepended to relative paths in order to
924       *          obtain an absolute path.
925       */
926      public String getRelativeBasePath()
927      {
928        return relativeBasePath;
929      }
930    
931    
932    
933      /**
934       * Specifies the base path that will be prepended to relative paths in order
935       * to obtain an absolute path.  This will only be used for "file:" URLs that
936       * have paths which do not begin with a space.
937       *
938       * @param  relativeBasePath  The base path that will be prepended to relative
939       *                           paths in order to obtain an absolute path.
940       */
941      public void setRelativeBasePath(final String relativeBasePath)
942      {
943        setRelativeBasePath(new File(relativeBasePath));
944      }
945    
946    
947    
948      /**
949       * Specifies the base path that will be prepended to relative paths in order
950       * to obtain an absolute path.  This will only be used for "file:" URLs that
951       * have paths which do not begin with a space.
952       *
953       * @param  relativeBasePath  The base path that will be prepended to relative
954       *                           paths in order to obtain an absolute path.
955       */
956      public void setRelativeBasePath(final File relativeBasePath)
957      {
958        final String path = relativeBasePath.getAbsolutePath();
959        if (path.endsWith(File.separator))
960        {
961          this.relativeBasePath = path;
962        }
963        else
964        {
965          this.relativeBasePath = path + File.separator;
966        }
967      }
968    
969    
970    
971      /**
972       * Retrieves the schema that will be used when reading LDIF records, if
973       * defined.
974       *
975       * @return  The schema that will be used when reading LDIF records, or
976       *          {@code null} if no schema should be used and all attributes should
977       *          be treated as case-insensitive strings.
978       */
979      public Schema getSchema()
980      {
981        return schema;
982      }
983    
984    
985    
986      /**
987       * Specifies the schema that should be used when reading LDIF records.
988       *
989       * @param  schema  The schema that should be used when reading LDIF records,
990       *                 or {@code null} if no schema should be used and all
991       *                 attributes should be treated as case-insensitive strings.
992       */
993      public void setSchema(final Schema schema)
994      {
995        this.schema = schema;
996      }
997    
998    
999    
1000      /**
1001       * Reads a record from the LDIF source.  It may be either an entry or an LDIF
1002       * change record.
1003       *
1004       * @return  The record read from the LDIF source, or {@code null} if there are
1005       *          no more entries to be read.
1006       *
1007       * @throws  IOException  If a problem occurs while trying to read from the
1008       *                       LDIF source.
1009       *
1010       * @throws  LDIFException  If the data read could not be parsed as an entry or
1011       *                         an LDIF change record.
1012       */
1013      public LDIFRecord readLDIFRecord()
1014             throws IOException, LDIFException
1015      {
1016        if (isAsync())
1017        {
1018          return readLDIFRecordAsync();
1019        }
1020        else
1021        {
1022          return readLDIFRecordInternal();
1023        }
1024      }
1025    
1026    
1027    
1028      /**
1029       * Reads an entry from the LDIF source.
1030       *
1031       * @return  The entry read from the LDIF source, or {@code null} if there are
1032       *          no more entries to be read.
1033       *
1034       * @throws  IOException  If a problem occurs while attempting to read from the
1035       *                       LDIF source.
1036       *
1037       * @throws  LDIFException  If the data read could not be parsed as an entry.
1038       */
1039      public Entry readEntry()
1040             throws IOException, LDIFException
1041      {
1042        if (isAsync())
1043        {
1044          return readEntryAsync();
1045        }
1046        else
1047        {
1048          return readEntryInternal();
1049        }
1050      }
1051    
1052    
1053    
1054      /**
1055       * Reads an LDIF change record from the LDIF source.  The LDIF record must
1056       * have a changetype.
1057       *
1058       * @return  The change record read from the LDIF source, or {@code null} if
1059       *          there are no more records to be read.
1060       *
1061       * @throws  IOException  If a problem occurs while attempting to read from the
1062       *                       LDIF source.
1063       *
1064       * @throws  LDIFException  If the data read could not be parsed as an LDIF
1065       *                         change record.
1066       */
1067      public LDIFChangeRecord readChangeRecord()
1068             throws IOException, LDIFException
1069      {
1070        return readChangeRecord(false);
1071      }
1072    
1073    
1074    
1075      /**
1076       * Reads an LDIF change record from the LDIF source.  Optionally, if the LDIF
1077       * record does not have a changetype, then it may be assumed to be an add
1078       * change record.
1079       *
1080       * @param  defaultAdd  Indicates whether an LDIF record not containing a
1081       *                     changetype should be retrieved as an add change record.
1082       *                     If this is {@code false} and the record read does not
1083       *                     include a changetype, then an {@link LDIFException}
1084       *                     will be thrown.
1085       *
1086       * @return  The change record read from the LDIF source, or {@code null} if
1087       *          there are no more records to be read.
1088       *
1089       * @throws  IOException  If a problem occurs while attempting to read from the
1090       *                       LDIF source.
1091       *
1092       * @throws  LDIFException  If the data read could not be parsed as an LDIF
1093       *                         change record.
1094       */
1095      public LDIFChangeRecord readChangeRecord(final boolean defaultAdd)
1096             throws IOException, LDIFException
1097      {
1098        if (isAsync())
1099        {
1100          return readChangeRecordAsync(defaultAdd);
1101        }
1102        else
1103        {
1104          return readChangeRecordInternal(defaultAdd);
1105        }
1106      }
1107    
1108    
1109    
1110      /**
1111       * Reads the next {@code LDIFRecord}, which was read and parsed by a different
1112       * thread.
1113       *
1114       * @return  The next parsed record or {@code null} if there are no more
1115       *          records to read.
1116       *
1117       * @throws IOException  If IOException was thrown when reading or parsing
1118       *                      the record.
1119       *
1120       * @throws LDIFException If LDIFException was thrown parsing the record.
1121       */
1122      private LDIFRecord readLDIFRecordAsync()
1123              throws IOException, LDIFException
1124      {
1125        final Result<UnparsedLDIFRecord, LDIFRecord> result =
1126             readLDIFRecordResultAsync();
1127        if (result == null)
1128        {
1129          return null;
1130        }
1131        else
1132        {
1133          return result.getOutput();
1134        }
1135      }
1136    
1137    
1138    
1139      /**
1140       * Reads an entry asynchronously from the LDIF source.
1141       *
1142       * @return The entry read from the LDIF source, or {@code null} if there are
1143       *         no more entries to be read.
1144       *
1145       * @throws IOException   If a problem occurs while attempting to read from the
1146       *                       LDIF source.
1147       * @throws LDIFException If the data read could not be parsed as an entry.
1148       */
1149      private Entry readEntryAsync()
1150              throws IOException, LDIFException
1151      {
1152        Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1153        LDIFRecord record = null;
1154        while (record == null)
1155        {
1156          result = readLDIFRecordResultAsync();
1157          if (result == null)
1158          {
1159            return null;
1160          }
1161    
1162          record = result.getOutput();
1163    
1164          // This is a special value that means we should skip this Entry.  We have
1165          // to use something different than null because null means EOF.
1166          if (record == SKIP_ENTRY)
1167          {
1168            record = null;
1169          }
1170        }
1171    
1172        if (!(record instanceof Entry))
1173        {
1174          try
1175          {
1176            // Some LDIFChangeRecord can be converted to an Entry.  This is really
1177            // an edge case though.
1178            return ((LDIFChangeRecord)record).toEntry();
1179          }
1180          catch (LDIFException e)
1181          {
1182            debugException(e);
1183            final long firstLineNumber = result.getInput().getFirstLineNumber();
1184            throw new LDIFException(e.getExceptionMessage(),
1185                                    firstLineNumber, true, e);
1186          }
1187        }
1188    
1189        return (Entry) record;
1190      }
1191    
1192    
1193    
1194      /**
1195       * Reads an LDIF change record from the LDIF source asynchronously.
1196       * Optionally, if the LDIF record does not have a changetype, then it may be
1197       * assumed to be an add change record.
1198       *
1199       * @param defaultAdd Indicates whether an LDIF record not containing a
1200       *                   changetype should be retrieved as an add change record.
1201       *                   If this is {@code false} and the record read does not
1202       *                   include a changetype, then an {@link LDIFException} will
1203       *                   be thrown.
1204       *
1205       * @return The change record read from the LDIF source, or {@code null} if
1206       *         there are no more records to be read.
1207       *
1208       * @throws IOException   If a problem occurs while attempting to read from the
1209       *                       LDIF source.
1210       * @throws LDIFException If the data read could not be parsed as an LDIF
1211       *                       change record.
1212       */
1213      private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd)
1214              throws IOException, LDIFException
1215      {
1216        final Result<UnparsedLDIFRecord, LDIFRecord> result =
1217             readLDIFRecordResultAsync();
1218        if (result == null)
1219        {
1220          return null;
1221        }
1222    
1223        final LDIFRecord record = result.getOutput();
1224        if (record instanceof LDIFChangeRecord)
1225        {
1226          return (LDIFChangeRecord) record;
1227        }
1228        else if (record instanceof Entry)
1229        {
1230          if (defaultAdd)
1231          {
1232            return new LDIFAddChangeRecord((Entry) record);
1233          }
1234          else
1235          {
1236            final long firstLineNumber = result.getInput().getFirstLineNumber();
1237            throw new LDIFException(
1238                 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber,
1239                 true);
1240          }
1241        }
1242    
1243        throw new AssertionError("LDIFRecords must either be an Entry or an " +
1244                                 "LDIFChangeRecord");
1245      }
1246    
1247    
1248    
1249      /**
1250       * Reads the next LDIF record, which was read and parsed asynchronously by
1251       * separate threads.
1252       *
1253       * @return  The next LDIF record or {@code null} if there are no more records.
1254       *
1255       * @throws  IOException  If a problem occurs while attempting to read from the
1256       *                       LDIF source.
1257       *
1258       * @throws  LDIFException  If the data read could not be parsed as an entry.
1259       */
1260      private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync()
1261              throws IOException, LDIFException
1262      {
1263        Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1264    
1265        // If the asynchronous reading and parsing is complete, then we don't have
1266        // to block waiting for the next record to show up on the queue.  If there
1267        // isn't a record there, then return null (EOF) right away.
1268        if (asyncParsingComplete.get())
1269        {
1270          result = asyncParsedRecords.poll();
1271        }
1272        else
1273        {
1274          try
1275          {
1276            // We probably could just do a asyncParsedRecords.take() here, but
1277            // there are some edge case error scenarios where
1278            // asyncParsingComplete might be set without a special EOF sentinel
1279            // Result enqueued.  So to guard against this, we have a very cautious
1280            // polling interval of 1 second.  During normal processing, we never
1281            // have to wait for this to expire, when there is something to do
1282            // (like shutdown).
1283            while ((result == null) && (!asyncParsingComplete.get()))
1284            {
1285              result = asyncParsedRecords.poll(1, TimeUnit.SECONDS);
1286            }
1287    
1288            // There's a very small chance that we missed the value, so double-check
1289            if (result == null)
1290            {
1291              result = asyncParsedRecords.poll();
1292            }
1293          }
1294          catch (InterruptedException e)
1295          {
1296            debugException(e);
1297            throw new IOException(getExceptionMessage(e));
1298          }
1299        }
1300        if (result == null)
1301        {
1302          return null;
1303        }
1304    
1305        rethrow(result.getFailureCause());
1306    
1307        // Check if we reached the end of the input
1308        final UnparsedLDIFRecord unparsedRecord = result.getInput();
1309        if (unparsedRecord.isEOF())
1310        {
1311          // This might have been set already by the LineReaderThread, but
1312          // just in case it hasn't gotten to it yet, do so here.
1313          asyncParsingComplete.set(true);
1314    
1315          // Enqueue this EOF result again for any other thread that might be
1316          // blocked in asyncParsedRecords.take() even though having multiple
1317          // threads call this method concurrently breaks the contract of this
1318          // class.
1319          try
1320          {
1321            asyncParsedRecords.put(result);
1322          }
1323          catch (InterruptedException e)
1324          {
1325            // We shouldn't ever get interrupted because the put won't ever block.
1326            // Once we are done reading, this is the only item left in the queue,
1327            // so we should always be able to re-enqueue it.
1328            debugException(e);
1329          }
1330          return null;
1331        }
1332    
1333        return result;
1334      }
1335    
1336    
1337    
1338      /**
1339       * Indicates whether this LDIF reader was constructed to perform asynchronous
1340       * processing.
1341       *
1342       * @return  {@code true} if this LDIFReader was constructed to perform
1343       *          asynchronous processing, or {@code false} if not.
1344       */
1345      private boolean isAsync()
1346      {
1347        return isAsync;
1348      }
1349    
1350    
1351    
1352      /**
1353       * If not {@code null}, rethrows the specified Throwable as either an
1354       * IOException or LDIFException.
1355       *
1356       * @param t  The exception to rethrow.  If it's {@code null}, then nothing
1357       *           is thrown.
1358       *
1359       * @throws IOException   If t is an IOException or a checked Exception that
1360       *                       is not an LDIFException.
1361       * @throws LDIFException  If t is an LDIFException.
1362       */
1363      static void rethrow(final Throwable t)
1364             throws IOException, LDIFException
1365      {
1366        if (t == null)
1367        {
1368          return;
1369        }
1370    
1371        if (t instanceof IOException)
1372        {
1373          throw (IOException) t;
1374        }
1375        else if (t instanceof LDIFException)
1376        {
1377          throw (LDIFException) t;
1378        }
1379        else if (t instanceof RuntimeException)
1380        {
1381          throw (RuntimeException) t;
1382        }
1383        else if (t instanceof Error)
1384        {
1385          throw (Error) t;
1386        }
1387        else
1388        {
1389          throw new IOException(getExceptionMessage(t));
1390        }
1391      }
1392    
1393    
1394    
1395      /**
1396       * Reads a record from the LDIF source.  It may be either an entry or an LDIF
1397       * change record.
1398       *
1399       * @return The record read from the LDIF source, or {@code null} if there are
1400       *         no more entries to be read.
1401       *
1402       * @throws IOException   If a problem occurs while trying to read from the
1403       *                       LDIF source.
1404       * @throws LDIFException If the data read could not be parsed as an entry or
1405       *                       an LDIF change record.
1406       */
1407      private LDIFRecord readLDIFRecordInternal()
1408           throws IOException, LDIFException
1409      {
1410        final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1411        return decodeRecord(unparsedRecord, relativeBasePath);
1412      }
1413    
1414    
1415    
1416      /**
1417       * Reads an entry from the LDIF source.
1418       *
1419       * @return The entry read from the LDIF source, or {@code null} if there are
1420       *         no more entries to be read.
1421       *
1422       * @throws IOException   If a problem occurs while attempting to read from the
1423       *                       LDIF source.
1424       * @throws LDIFException If the data read could not be parsed as an entry.
1425       */
1426      private Entry readEntryInternal()
1427           throws IOException, LDIFException
1428      {
1429        Entry e = null;
1430        while (e == null)
1431        {
1432          final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1433          if (unparsedRecord.isEOF())
1434          {
1435            return null;
1436          }
1437    
1438          e = decodeEntry(unparsedRecord, relativeBasePath);
1439          debugLDIFRead(e);
1440    
1441          if (entryTranslator != null)
1442          {
1443            e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber());
1444          }
1445        }
1446        return e;
1447      }
1448    
1449    
1450    
1451      /**
1452       * Reads an LDIF change record from the LDIF source.  Optionally, if the LDIF
1453       * record does not have a changetype, then it may be assumed to be an add
1454       * change record.
1455       *
1456       * @param defaultAdd Indicates whether an LDIF record not containing a
1457       *                   changetype should be retrieved as an add change record.
1458       *                   If this is {@code false} and the record read does not
1459       *                   include a changetype, then an {@link LDIFException} will
1460       *                   be thrown.
1461       *
1462       * @return The change record read from the LDIF source, or {@code null} if
1463       *         there are no more records to be read.
1464       *
1465       * @throws IOException   If a problem occurs while attempting to read from the
1466       *                       LDIF source.
1467       * @throws LDIFException If the data read could not be parsed as an LDIF
1468       *                       change record.
1469       */
1470      private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd)
1471           throws IOException, LDIFException
1472      {
1473        final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1474        if (unparsedRecord.isEOF())
1475        {
1476          return null;
1477        }
1478    
1479        final LDIFChangeRecord r =
1480             decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd);
1481        debugLDIFRead(r);
1482        return r;
1483      }
1484    
1485    
1486    
1487      /**
1488       * Reads a record (either an entry or a change record) from the LDIF source
1489       * and places it in the line list.
1490       *
1491       * @return  The line number for the first line of the entry that was read.
1492       *
1493       * @throws  IOException  If a problem occurs while attempting to read from the
1494       *                       LDIF source.
1495       *
1496       * @throws  LDIFException  If the data read could not be parsed as a valid
1497       *                         LDIF record.
1498       */
1499      private UnparsedLDIFRecord readUnparsedRecord()
1500             throws IOException, LDIFException
1501      {
1502        final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20);
1503        boolean lastWasComment = false;
1504        long firstLineNumber = lineNumberCounter + 1;
1505        while (true)
1506        {
1507          final String line = reader.readLine();
1508          lineNumberCounter++;
1509    
1510          if (line == null)
1511          {
1512            // We've hit the end of the LDIF source.  If we haven't read any entry
1513            // data, then return null.  Otherwise, the last entry wasn't followed by
1514            // a blank line, which is OK, and we should decode that entry.
1515            if (lineList.isEmpty())
1516            {
1517              return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0),
1518                   duplicateValueBehavior, trailingSpaceBehavior, schema, -1);
1519            }
1520            else
1521            {
1522              break;
1523            }
1524          }
1525    
1526          if (line.length() == 0)
1527          {
1528            // It's a blank line.  If we have read entry data, then this signals the
1529            // end of the entry.  Otherwise, it's an extra space between entries,
1530            // which is OK.
1531            lastWasComment = false;
1532            if (lineList.isEmpty())
1533            {
1534              firstLineNumber++;
1535              continue;
1536            }
1537            else
1538            {
1539              break;
1540            }
1541          }
1542    
1543          if (line.charAt(0) == ' ')
1544          {
1545            // The line starts with a space, which means that it must be a
1546            // continuation of the previous line.  This is true even if the last
1547            // line was a comment.
1548            if (lastWasComment)
1549            {
1550              // What we've read is part of a comment, so we don't care about its
1551              // content.
1552            }
1553            else if (lineList.isEmpty())
1554            {
1555              throw new LDIFException(
1556                             ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter),
1557                             lineNumberCounter, false);
1558            }
1559            else
1560            {
1561              lineList.get(lineList.size() - 1).append(line.substring(1));
1562              lastWasComment = false;
1563            }
1564          }
1565          else if (line.charAt(0) == '#')
1566          {
1567            lastWasComment = true;
1568          }
1569          else
1570          {
1571            // We want to make sure that we skip over the "version:" line if it
1572            // exists, but that should only occur at the beginning of an entry where
1573            // it can't be confused with a possible "version" attribute.
1574            if (lineList.isEmpty() && line.startsWith("version:"))
1575            {
1576              lastWasComment = true;
1577            }
1578            else
1579            {
1580              lineList.add(new StringBuilder(line));
1581              lastWasComment = false;
1582            }
1583          }
1584        }
1585    
1586        return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1587             trailingSpaceBehavior, schema, firstLineNumber);
1588      }
1589    
1590    
1591    
1592      /**
1593       * Decodes the provided set of LDIF lines as an entry.  The provided set of
1594       * lines must contain exactly one entry.  Long lines may be wrapped as per the
1595       * LDIF specification, and it is acceptable to have one or more blank lines
1596       * following the entry.
1597       *
1598       * @param  ldifLines  The set of lines that comprise the LDIF representation
1599       *                    of the entry.  It must not be {@code null} or empty.
1600       *
1601       * @return  The entry read from LDIF.
1602       *
1603       * @throws  LDIFException  If the provided LDIF data cannot be decoded as an
1604       *                         entry.
1605       */
1606      public static Entry decodeEntry(final String... ldifLines)
1607             throws LDIFException
1608      {
1609        final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP,
1610             TrailingSpaceBehavior.REJECT, null, ldifLines),
1611             DEFAULT_RELATIVE_BASE_PATH);
1612        debugLDIFRead(e);
1613        return e;
1614      }
1615    
1616    
1617    
1618      /**
1619       * Decodes the provided set of LDIF lines as an entry.  The provided set of
1620       * lines must contain exactly one entry.  Long lines may be wrapped as per the
1621       * LDIF specification, and it is acceptable to have one or more blank lines
1622       * following the entry.
1623       *
1624       * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
1625       *                                attribute values encountered while parsing.
1626       * @param  schema                 The schema to use when parsing the record,
1627       *                                if applicable.
1628       * @param  ldifLines              The set of lines that comprise the LDIF
1629       *                                representation of the entry.  It must not be
1630       *                                {@code null} or empty.
1631       *
1632       * @return  The entry read from LDIF.
1633       *
1634       * @throws  LDIFException  If the provided LDIF data cannot be decoded as an
1635       *                         entry.
1636       */
1637      public static Entry decodeEntry(final boolean ignoreDuplicateValues,
1638                                      final Schema schema,
1639                                      final String... ldifLines)
1640             throws LDIFException
1641      {
1642        final Entry e = decodeEntry(prepareRecord(
1643                  (ignoreDuplicateValues
1644                       ? DuplicateValueBehavior.STRIP
1645                       : DuplicateValueBehavior.REJECT),
1646                  TrailingSpaceBehavior.REJECT, schema, ldifLines),
1647             DEFAULT_RELATIVE_BASE_PATH);
1648        debugLDIFRead(e);
1649        return e;
1650      }
1651    
1652    
1653    
1654      /**
1655       * Decodes the provided set of LDIF lines as an LDIF change record.  The
1656       * provided set of lines must contain exactly one change record and it must
1657       * include a changetype.  Long lines may be wrapped as per the LDIF
1658       * specification, and it is acceptable to have one or more blank lines
1659       * following the entry.
1660       *
1661       * @param  ldifLines  The set of lines that comprise the LDIF representation
1662       *                    of the change record.  It must not be {@code null} or
1663       *                    empty.
1664       *
1665       * @return  The change record read from LDIF.
1666       *
1667       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1668       *                         change record.
1669       */
1670      public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines)
1671             throws LDIFException
1672      {
1673        return decodeChangeRecord(false, ldifLines);
1674      }
1675    
1676    
1677    
1678      /**
1679       * Decodes the provided set of LDIF lines as an LDIF change record.  The
1680       * provided set of lines must contain exactly one change record.  Long lines
1681       * may be wrapped as per the LDIF specification, and it is acceptable to have
1682       * one or more blank lines following the entry.
1683       *
1684       * @param  defaultAdd  Indicates whether an LDIF record not containing a
1685       *                     changetype should be retrieved as an add change record.
1686       *                     If this is {@code false} and the record read does not
1687       *                     include a changetype, then an {@link LDIFException}
1688       *                     will be thrown.
1689       * @param  ldifLines  The set of lines that comprise the LDIF representation
1690       *                    of the change record.  It must not be {@code null} or
1691       *                    empty.
1692       *
1693       * @return  The change record read from LDIF.
1694       *
1695       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1696       *                         change record.
1697       */
1698      public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd,
1699                                                        final String... ldifLines)
1700             throws LDIFException
1701      {
1702        final LDIFChangeRecord r =
1703             decodeChangeRecord(
1704                  prepareRecord(DuplicateValueBehavior.STRIP,
1705                       TrailingSpaceBehavior.REJECT, null, ldifLines),
1706                  DEFAULT_RELATIVE_BASE_PATH, defaultAdd);
1707        debugLDIFRead(r);
1708        return r;
1709      }
1710    
1711    
1712    
1713      /**
1714       * Decodes the provided set of LDIF lines as an LDIF change record.  The
1715       * provided set of lines must contain exactly one change record.  Long lines
1716       * may be wrapped as per the LDIF specification, and it is acceptable to have
1717       * one or more blank lines following the entry.
1718       *
1719       * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
1720       *                                attribute values encountered while parsing.
1721       * @param  schema                 The schema to use when processing the change
1722       *                                record, or {@code null} if no schema should
1723       *                                be used and all values should be treated as
1724       *                                case-insensitive strings.
1725       * @param  defaultAdd             Indicates whether an LDIF record not
1726       *                                containing a changetype should be retrieved
1727       *                                as an add change record.  If this is
1728       *                                {@code false} and the record read does not
1729       *                                include a changetype, then an
1730       *                                {@link LDIFException} will be thrown.
1731       * @param  ldifLines              The set of lines that comprise the LDIF
1732       *                                representation of the change record.  It
1733       *                                must not be {@code null} or empty.
1734       *
1735       * @return  The change record read from LDIF.
1736       *
1737       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1738       *                         change record.
1739       */
1740      public static LDIFChangeRecord decodeChangeRecord(
1741                                          final boolean ignoreDuplicateValues,
1742                                          final Schema schema,
1743                                          final boolean defaultAdd,
1744                                          final String... ldifLines)
1745             throws LDIFException
1746      {
1747        final LDIFChangeRecord r = decodeChangeRecord(
1748             prepareRecord(
1749                  (ignoreDuplicateValues
1750                       ? DuplicateValueBehavior.STRIP
1751                       : DuplicateValueBehavior.REJECT),
1752                  TrailingSpaceBehavior.REJECT, schema, ldifLines),
1753             DEFAULT_RELATIVE_BASE_PATH, defaultAdd);
1754        debugLDIFRead(r);
1755        return r;
1756      }
1757    
1758    
1759    
1760      /**
1761       * Parses the provided set of lines into a list of {@code StringBuilder}
1762       * objects suitable for decoding into an entry or LDIF change record.
1763       * Comments will be ignored and wrapped lines will be unwrapped.
1764       *
1765       * @param  duplicateValueBehavior  The behavior that should be exhibited if
1766       *                                 the LDIF reader encounters an entry with
1767       *                                 duplicate values.
1768       * @param  trailingSpaceBehavior   The behavior that should be exhibited when
1769       *                                 encountering attribute values which are not
1770       *                                 base64-encoded but contain trailing spaces.
1771       * @param  schema                  The schema to use when parsing the record,
1772       *                                 if applicable.
1773       * @param  ldifLines               The set of lines that comprise the record
1774       *                                 to decode.  It must not be {@code null} or
1775       *                                 empty.
1776       *
1777       * @return  The prepared list of {@code StringBuilder} objects ready to be
1778       *          decoded.
1779       *
1780       * @throws  LDIFException  If the provided lines do not contain valid LDIF
1781       *                         content.
1782       */
1783      private static UnparsedLDIFRecord prepareRecord(
1784                          final DuplicateValueBehavior duplicateValueBehavior,
1785                          final TrailingSpaceBehavior trailingSpaceBehavior,
1786                          final Schema schema, final String... ldifLines)
1787              throws LDIFException
1788      {
1789        ensureNotNull(ldifLines);
1790        ensureFalse(ldifLines.length == 0,
1791                    "LDIFReader.prepareRecord.ldifLines must not be empty.");
1792    
1793        boolean lastWasComment = false;
1794        final ArrayList<StringBuilder> lineList =
1795             new ArrayList<StringBuilder>(ldifLines.length);
1796        for (int i=0; i < ldifLines.length; i++)
1797        {
1798          final String line = ldifLines[i];
1799          if (line.length() == 0)
1800          {
1801            // This is only acceptable if there are no more non-empty lines in the
1802            // array.
1803            for (int j=i+1; j < ldifLines.length; j++)
1804            {
1805              if (ldifLines[j].length() > 0)
1806              {
1807                throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true,
1808                                        ldifLines, null);
1809              }
1810    
1811              // If we've gotten here, then we know that we're at the end of the
1812              // entry.  If we have read data, then we can decode it as an entry.
1813              // Otherwise, there was no real data in the provided LDIF lines.
1814              if (lineList.isEmpty())
1815              {
1816                throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true,
1817                                        ldifLines, null);
1818              }
1819              else
1820              {
1821                return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1822                     trailingSpaceBehavior, schema, 0);
1823              }
1824            }
1825          }
1826    
1827          if (line.charAt(0) == ' ')
1828          {
1829            if (i > 0)
1830            {
1831              if (! lastWasComment)
1832              {
1833                lineList.get(lineList.size() - 1).append(line.substring(1));
1834              }
1835            }
1836            else
1837            {
1838              throw new LDIFException(
1839                             ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0,
1840                             true, ldifLines, null);
1841            }
1842          }
1843          else if (line.charAt(0) == '#')
1844          {
1845            lastWasComment = true;
1846          }
1847          else
1848          {
1849            lineList.add(new StringBuilder(line));
1850            lastWasComment = false;
1851          }
1852        }
1853    
1854        if (lineList.isEmpty())
1855        {
1856          throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null);
1857        }
1858        else
1859        {
1860          return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1861               trailingSpaceBehavior, schema, 0);
1862        }
1863      }
1864    
1865    
1866    
1867      /**
1868       * Decodes the unparsed record that was read from the LDIF source.  It may be
1869       * either an entry or an LDIF change record.
1870       *
1871       * @param  unparsedRecord    The unparsed LDIF record that was read from the
1872       *                           input.  It must not be {@code null} or empty.
1873       * @param  relativeBasePath  The base path that will be prepended to relative
1874       *                           paths in order to obtain an absolute path.
1875       *
1876       * @return  The parsed record, or {@code null} if there are no more entries to
1877       *          be read.
1878       *
1879       * @throws  LDIFException  If the data read could not be parsed as an entry or
1880       *                         an LDIF change record.
1881       */
1882      private static LDIFRecord decodeRecord(
1883                                     final UnparsedLDIFRecord unparsedRecord,
1884                                     final String relativeBasePath)
1885           throws LDIFException
1886      {
1887        // If there was an error reading from the input, then we rethrow it here.
1888        final Exception readError = unparsedRecord.getFailureCause();
1889        if (readError != null)
1890        {
1891          if (readError instanceof LDIFException)
1892          {
1893            // If the error was an LDIFException, which will normally be the case,
1894            // then rethrow it with all of the same state.  We could just
1895            //   throw (LDIFException) readError;
1896            // but that's considered bad form.
1897            final LDIFException ldifEx = (LDIFException) readError;
1898            throw new LDIFException(ldifEx.getMessage(),
1899                                    ldifEx.getLineNumber(),
1900                                    ldifEx.mayContinueReading(),
1901                                    ldifEx.getDataLines(),
1902                                    ldifEx.getCause());
1903          }
1904          else
1905          {
1906            throw new LDIFException(getExceptionMessage(readError),
1907                                    -1, true, readError);
1908          }
1909        }
1910    
1911        if (unparsedRecord.isEOF())
1912        {
1913          return null;
1914        }
1915    
1916        final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList();
1917        if (unparsedRecord.getLineList() == null)
1918        {
1919          return null;  // We can get here if there was an error reading the lines.
1920        }
1921    
1922        final LDIFRecord r;
1923        if ((lineList.size() > 1) &&
1924            toLowerCase(lineList.get(1).toString()).startsWith("changetype:"))
1925        {
1926          r = decodeChangeRecord(unparsedRecord, relativeBasePath, false);
1927        }
1928        else
1929        {
1930          r = decodeEntry(unparsedRecord, relativeBasePath);
1931        }
1932    
1933        debugLDIFRead(r);
1934        return r;
1935      }
1936    
1937    
1938    
1939      /**
1940       * Decodes the provided set of LDIF lines as an entry.  The provided list must
1941       * not contain any blank lines or comments, and lines are not allowed to be
1942       * wrapped.
1943       *
1944       * @param  unparsedRecord   The unparsed LDIF record that was read from the
1945       *                          input.  It must not be {@code null} or empty.
1946       * @param  relativeBasePath  The base path that will be prepended to relative
1947       *                           paths in order to obtain an absolute path.
1948       *
1949       * @return  The entry read from LDIF.
1950       *
1951       * @throws  LDIFException  If the provided LDIF data cannot be read as an
1952       *                         entry.
1953       */
1954      private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord,
1955                                       final String relativeBasePath)
1956              throws LDIFException
1957      {
1958        final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
1959        final long firstLineNumber = unparsedRecord.getFirstLineNumber();
1960    
1961        final Iterator<StringBuilder> iterator = ldifLines.iterator();
1962    
1963        // The first line must be the entry DN, and it must start with "dn:".
1964        final StringBuilder line = iterator.next();
1965        handleTrailingSpaces(line, null, firstLineNumber,
1966             unparsedRecord.getTrailingSpaceBehavior());
1967        final int colonPos = line.indexOf(":");
1968        if ((colonPos < 0) ||
1969            (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
1970        {
1971          throw new LDIFException(
1972                         ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
1973                         firstLineNumber, true, ldifLines, null);
1974        }
1975    
1976        final String dn;
1977        final int length = line.length();
1978        if (length == (colonPos+1))
1979        {
1980          // The colon was the last character on the line.  This is acceptable and
1981          // indicates that the entry has the null DN.
1982          dn = "";
1983        }
1984        else if (line.charAt(colonPos+1) == ':')
1985        {
1986          // Skip over any spaces leading up to the value, and then the rest of the
1987          // string is the base64-encoded DN.
1988          int pos = colonPos+2;
1989          while ((pos < length) && (line.charAt(pos) == ' '))
1990          {
1991            pos++;
1992          }
1993    
1994          try
1995          {
1996            final byte[] dnBytes = Base64.decode(line.substring(pos));
1997            dn = new String(dnBytes, "UTF-8");
1998          }
1999          catch (final ParseException pe)
2000          {
2001            debugException(pe);
2002            throw new LDIFException(
2003                           ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2004                                                                pe.getMessage()),
2005                           firstLineNumber, true, ldifLines, pe);
2006          }
2007          catch (final Exception e)
2008          {
2009            debugException(e);
2010            throw new LDIFException(
2011                           ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e),
2012                           firstLineNumber, true, ldifLines, e);
2013          }
2014        }
2015        else
2016        {
2017          // Skip over any spaces leading up to the value, and then the rest of the
2018          // string is the DN.
2019          int pos = colonPos+1;
2020          while ((pos < length) && (line.charAt(pos) == ' '))
2021          {
2022            pos++;
2023          }
2024    
2025          dn = line.substring(pos);
2026        }
2027    
2028    
2029        // The remaining lines must be the attributes for the entry.  However, we
2030        // will allow the case in which an entry does not have any attributes, to be
2031        // able to support reading search result entries in which no attributes were
2032        // returned.
2033        if (! iterator.hasNext())
2034        {
2035          return new Entry(dn, unparsedRecord.getSchema());
2036        }
2037    
2038        return new Entry(dn, unparsedRecord.getSchema(),
2039             parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2040                  unparsedRecord.getTrailingSpaceBehavior(),
2041                  unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath,
2042                  firstLineNumber));
2043      }
2044    
2045    
2046    
2047      /**
2048       * Decodes the provided set of LDIF lines as a change record.  The provided
2049       * list must not contain any blank lines or comments, and lines are not
2050       * allowed to be wrapped.
2051       *
2052       * @param  unparsedRecord    The unparsed LDIF record that was read from the
2053       *                           input.  It must not be {@code null} or empty.
2054       * @param  relativeBasePath  The base path that will be prepended to relative
2055       *                           paths in order to obtain an absolute path.
2056       * @param  defaultAdd        Indicates whether an LDIF record not containing a
2057       *                           changetype should be retrieved as an add change
2058       *                           record.  If this is {@code false} and the record
2059       *                           read does not include a changetype, then an
2060       *                           {@link LDIFException} will be thrown.
2061       *
2062       * @return  The change record read from LDIF.
2063       *
2064       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
2065       *                         change record.
2066       */
2067      private static LDIFChangeRecord decodeChangeRecord(
2068                                           final UnparsedLDIFRecord unparsedRecord,
2069                                           final String relativeBasePath,
2070                                           final boolean defaultAdd)
2071              throws LDIFException
2072      {
2073        final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2074        final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2075    
2076        final Iterator<StringBuilder> iterator = ldifLines.iterator();
2077    
2078        // The first line must be the entry DN, and it must start with "dn:".
2079        StringBuilder line = iterator.next();
2080        handleTrailingSpaces(line, null, firstLineNumber,
2081             unparsedRecord.getTrailingSpaceBehavior());
2082        int colonPos = line.indexOf(":");
2083        if ((colonPos < 0) ||
2084            (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2085        {
2086          throw new LDIFException(
2087               ERR_READ_CR_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2088               firstLineNumber, true, ldifLines, null);
2089        }
2090    
2091        final String dn;
2092        int length = line.length();
2093        if (length == (colonPos+1))
2094        {
2095          // The colon was the last character on the line.  This is acceptable and
2096          // indicates that the entry has the null DN.
2097          dn = "";
2098        }
2099        else if (line.charAt(colonPos+1) == ':')
2100        {
2101          // Skip over any spaces leading up to the value, and then the rest of the
2102          // string is the base64-encoded DN.
2103          int pos = colonPos+2;
2104          while ((pos < length) && (line.charAt(pos) == ' '))
2105          {
2106            pos++;
2107          }
2108    
2109          try
2110          {
2111            final byte[] dnBytes = Base64.decode(line.substring(pos));
2112            dn = new String(dnBytes, "UTF-8");
2113          }
2114          catch (final ParseException pe)
2115          {
2116            debugException(pe);
2117            throw new LDIFException(
2118                           ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2119                                                                   pe.getMessage()),
2120                           firstLineNumber, true, ldifLines, pe);
2121          }
2122          catch (final Exception e)
2123          {
2124            debugException(e);
2125            throw new LDIFException(
2126                           ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2127                                                                   e),
2128                           firstLineNumber, true, ldifLines, e);
2129          }
2130        }
2131        else
2132        {
2133          // Skip over any spaces leading up to the value, and then the rest of the
2134          // string is the DN.
2135          int pos = colonPos+1;
2136          while ((pos < length) && (line.charAt(pos) == ' '))
2137          {
2138            pos++;
2139          }
2140    
2141          dn = line.substring(pos);
2142        }
2143    
2144    
2145        // The second line must be the change type, and it must start with
2146        // "changetype:".
2147        if (! iterator.hasNext())
2148        {
2149          throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber),
2150                                  firstLineNumber, true, ldifLines, null);
2151        }
2152    
2153    
2154        // If defaultAdd is true, then the change record may or may not have a
2155        // changetype.  If it is false, then the record must have a changetype.
2156        final String changeType;
2157        if (defaultAdd &&
2158            (! toLowerCase(ldifLines.get(1).toString()).startsWith("changetype:")))
2159        {
2160          changeType = "add";
2161        }
2162        else
2163        {
2164          line = iterator.next();
2165          handleTrailingSpaces(line, dn, firstLineNumber,
2166               unparsedRecord.getTrailingSpaceBehavior());
2167          colonPos = line.indexOf(":");
2168          if ((colonPos < 0) ||
2169              (! line.substring(0, colonPos).equalsIgnoreCase("changetype")))
2170          {
2171            throw new LDIFException(
2172                 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CT.get(firstLineNumber),
2173                 firstLineNumber, true, ldifLines, null);
2174          }
2175    
2176          length = line.length();
2177          if (length == (colonPos+1))
2178          {
2179            // The colon was the last character on the line.  This is not
2180            // acceptable.
2181            throw new LDIFException(
2182                 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber,
2183                 true, ldifLines, null);
2184          }
2185          else if (line.charAt(colonPos+1) == ':')
2186          {
2187            // Skip over any spaces leading up to the value, and then the rest of
2188            // the string is the base64-encoded changetype.  This is unusual and
2189            // unnecessary, but is nevertheless acceptable.
2190            int pos = colonPos+2;
2191            while ((pos < length) && (line.charAt(pos) == ' '))
2192            {
2193              pos++;
2194            }
2195    
2196            try
2197            {
2198              final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
2199              changeType = new String(changeTypeBytes, "UTF-8");
2200            }
2201            catch (final ParseException pe)
2202            {
2203              debugException(pe);
2204              throw new LDIFException(
2205                             ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber,
2206                                                                  pe.getMessage()),
2207                             firstLineNumber, true, ldifLines, pe);
2208            }
2209            catch (final Exception e)
2210            {
2211              debugException(e);
2212              throw new LDIFException(
2213                   ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e),
2214                   firstLineNumber, true, ldifLines, e);
2215            }
2216          }
2217          else
2218          {
2219            // Skip over any spaces leading up to the value, and then the rest of
2220            // the string is the changetype.
2221            int pos = colonPos+1;
2222            while ((pos < length) && (line.charAt(pos) == ' '))
2223            {
2224              pos++;
2225            }
2226    
2227            changeType = line.substring(pos);
2228          }
2229        }
2230    
2231    
2232        // Make sure that the change type is acceptable and then decode the rest of
2233        // the change record accordingly.
2234        final String lowerChangeType = toLowerCase(changeType);
2235        if (lowerChangeType.equals("add"))
2236        {
2237          // There must be at least one more line.  If not, then that's an error.
2238          // Otherwise, parse the rest of the data as attribute-value pairs.
2239          if (iterator.hasNext())
2240          {
2241            final Collection<Attribute> attrs =
2242                 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2243                      unparsedRecord.getTrailingSpaceBehavior(),
2244                      unparsedRecord.getSchema(), ldifLines, iterator,
2245                      relativeBasePath, firstLineNumber);
2246            final Attribute[] attributes = new Attribute[attrs.size()];
2247            final Iterator<Attribute> attrIterator = attrs.iterator();
2248            for (int i=0; i < attributes.length; i++)
2249            {
2250              attributes[i] = attrIterator.next();
2251            }
2252    
2253            return new LDIFAddChangeRecord(dn, attributes);
2254          }
2255          else
2256          {
2257            throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber),
2258                                    firstLineNumber, true, ldifLines, null);
2259          }
2260        }
2261        else if (lowerChangeType.equals("delete"))
2262        {
2263          // There shouldn't be any more data.  If there is, then that's an error.
2264          // Otherwise, we can just return the delete change record with what we
2265          // already know.
2266          if (iterator.hasNext())
2267          {
2268            throw new LDIFException(
2269                           ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber),
2270                           firstLineNumber, true, ldifLines, null);
2271          }
2272          else
2273          {
2274            return new LDIFDeleteChangeRecord(dn);
2275          }
2276        }
2277        else if (lowerChangeType.equals("modify"))
2278        {
2279          // There must be at least one more line.  If not, then that's an error.
2280          // Otherwise, parse the rest of the data as a set of modifications.
2281          if (iterator.hasNext())
2282          {
2283            final Modification[] mods = parseModifications(dn,
2284                 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator,
2285                 firstLineNumber);
2286            return new LDIFModifyChangeRecord(dn, mods);
2287          }
2288          else
2289          {
2290            throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber),
2291                                    firstLineNumber, true, ldifLines, null);
2292          }
2293        }
2294        else if (lowerChangeType.equals("moddn") ||
2295                 lowerChangeType.equals("modrdn"))
2296        {
2297          // There must be at least one more line.  If not, then that's an error.
2298          // Otherwise, parse the rest of the data as a set of modifications.
2299          if (iterator.hasNext())
2300          {
2301            return parseModifyDNChangeRecord(ldifLines, iterator, dn,
2302                 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber);
2303          }
2304          else
2305          {
2306            throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber),
2307                                    firstLineNumber, true, ldifLines, null);
2308          }
2309        }
2310        else
2311        {
2312          throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType,
2313                                                             firstLineNumber),
2314                                  firstLineNumber, true, ldifLines, null);
2315        }
2316      }
2317    
2318    
2319    
2320      /**
2321       * Parses the data available through the provided iterator as a collection of
2322       * attributes suitable for use in an entry or an add change record.
2323       *
2324       * @param  dn                      The DN of the record being read.
2325       * @param  duplicateValueBehavior  The behavior that should be exhibited if
2326       *                                 the LDIF reader encounters an entry with
2327       *                                 duplicate values.
2328       * @param  trailingSpaceBehavior   The behavior that should be exhibited when
2329       *                                 encountering attribute values which are not
2330       *                                 base64-encoded but contain trailing spaces.
2331       * @param  schema                  The schema to use when parsing the
2332       *                                 attributes, or {@code null} if none is
2333       *                                 needed.
2334       * @param  ldifLines               The lines that comprise the LDIF
2335       *                                 representation of the full record being
2336       *                                 parsed.
2337       * @param  iterator                The iterator to use to access the attribute
2338       *                                 lines.
2339       * @param  relativeBasePath        The base path that will be prepended to
2340       *                                 relative paths in order to obtain an
2341       *                                 absolute path.
2342       * @param  firstLineNumber         The line number for the start of the
2343       *                                 record.
2344       *
2345       * @return  The collection of attributes that were read.
2346       *
2347       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
2348       *                         set of attributes.
2349       */
2350      private static ArrayList<Attribute> parseAttributes(final String dn,
2351           final DuplicateValueBehavior duplicateValueBehavior,
2352           final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema,
2353           final ArrayList<StringBuilder> ldifLines,
2354           final Iterator<StringBuilder> iterator, final String relativeBasePath,
2355           final long firstLineNumber)
2356              throws LDIFException
2357      {
2358        final LinkedHashMap<String,Object> attributes =
2359             new LinkedHashMap<String,Object>(ldifLines.size());
2360        while (iterator.hasNext())
2361        {
2362          final StringBuilder line = iterator.next();
2363          handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2364          final int colonPos = line.indexOf(":");
2365          if (colonPos <= 0)
2366          {
2367            throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
2368                                    firstLineNumber, true, ldifLines, null);
2369          }
2370    
2371          final String attributeName = line.substring(0, colonPos);
2372          final String lowerName     = toLowerCase(attributeName);
2373    
2374          final MatchingRule matchingRule;
2375          if (schema == null)
2376          {
2377            matchingRule = CaseIgnoreStringMatchingRule.getInstance();
2378          }
2379          else
2380          {
2381            matchingRule =
2382                 MatchingRule.selectEqualityMatchingRule(attributeName, schema);
2383          }
2384    
2385          Attribute attr;
2386          final LDIFAttribute ldifAttr;
2387          final Object attrObject = attributes.get(lowerName);
2388          if (attrObject == null)
2389          {
2390            attr     = null;
2391            ldifAttr = null;
2392          }
2393          else
2394          {
2395            if (attrObject instanceof Attribute)
2396            {
2397              attr     = (Attribute) attrObject;
2398              ldifAttr = new LDIFAttribute(attr.getName(), matchingRule,
2399                                           attr.getRawValues()[0]);
2400              attributes.put(lowerName, ldifAttr);
2401            }
2402            else
2403            {
2404              attr     = null;
2405              ldifAttr = (LDIFAttribute) attrObject;
2406            }
2407          }
2408    
2409          final int length = line.length();
2410          if (length == (colonPos+1))
2411          {
2412            // This means that the attribute has a zero-length value, which is
2413            // acceptable.
2414            if (attrObject == null)
2415            {
2416              attr = new Attribute(attributeName, "");
2417              attributes.put(lowerName, attr);
2418            }
2419            else
2420            {
2421              try
2422              {
2423                if (! ldifAttr.addValue(new ASN1OctetString(),
2424                           duplicateValueBehavior))
2425                {
2426                  if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2427                  {
2428                    throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2429                         firstLineNumber, attributeName), firstLineNumber, true,
2430                         ldifLines, null);
2431                  }
2432                }
2433              }
2434              catch (LDAPException le)
2435              {
2436                throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
2437                     firstLineNumber, attributeName, getExceptionMessage(le)),
2438                     firstLineNumber, true, ldifLines, le);
2439              }
2440            }
2441          }
2442          else if (line.charAt(colonPos+1) == ':')
2443          {
2444            // Skip over any spaces leading up to the value, and then the rest of
2445            // the string is the base64-encoded attribute value.
2446            int pos = colonPos+2;
2447            while ((pos < length) && (line.charAt(pos) == ' '))
2448            {
2449              pos++;
2450            }
2451    
2452            try
2453            {
2454              final byte[] valueBytes = Base64.decode(line.substring(pos));
2455              if (attrObject == null)
2456              {
2457                attr = new Attribute(attributeName, valueBytes);
2458                attributes.put(lowerName, attr);
2459              }
2460              else
2461              {
2462                try
2463                {
2464                  if (! ldifAttr.addValue(new ASN1OctetString(valueBytes),
2465                             duplicateValueBehavior))
2466                  {
2467                    if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2468                    {
2469                      throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2470                           firstLineNumber, attributeName), firstLineNumber, true,
2471                           ldifLines, null);
2472                    }
2473                  }
2474                }
2475                catch (LDAPException le)
2476                {
2477                  throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
2478                       firstLineNumber, attributeName, getExceptionMessage(le)),
2479                       firstLineNumber, true, ldifLines, le);
2480                }
2481              }
2482            }
2483            catch (final ParseException pe)
2484            {
2485              debugException(pe);
2486              throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
2487                                           attributeName,  firstLineNumber,
2488                                           pe.getMessage()),
2489                                      firstLineNumber, true, ldifLines, pe);
2490            }
2491          }
2492          else if (line.charAt(colonPos+1) == '<')
2493          {
2494            // Skip over any spaces leading up to the value, and then the rest of
2495            // the string is a URL that indicates where to get the real content.
2496            // At the present time, we'll only support the file URLs.
2497            int pos = colonPos+2;
2498            while ((pos < length) && (line.charAt(pos) == ' '))
2499            {
2500              pos++;
2501            }
2502    
2503            final String path;
2504            final String urlString = line.substring(pos);
2505            final String lowerURLString = toLowerCase(urlString);
2506            if (lowerURLString.startsWith("file:/"))
2507            {
2508              pos = 6;
2509              while ((pos < urlString.length()) && (urlString.charAt(pos) == '/'))
2510              {
2511                pos++;
2512              }
2513    
2514              path = urlString.substring(pos-1);
2515            }
2516            else if (lowerURLString.startsWith("file:"))
2517            {
2518              // A file: URL that doesn't include a slash will be interpreted as a
2519              // relative path.
2520              path = relativeBasePath + urlString.substring(5);
2521            }
2522            else
2523            {
2524              throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(attributeName,
2525                                           urlString, firstLineNumber),
2526                                      firstLineNumber, true, ldifLines, null);
2527            }
2528    
2529            try
2530            {
2531              final File f = new File(path);
2532              if (! f.exists())
2533              {
2534                throw new LDIFException(ERR_READ_URL_NO_SUCH_FILE.get(attributeName,
2535                                             urlString, firstLineNumber,
2536                                             f.getAbsolutePath()),
2537                                        firstLineNumber, true, ldifLines, null);
2538              }
2539    
2540              // In order to conserve memory, we'll only allow values to be read
2541              // from files no larger than 10 megabytes.
2542              final long fileSize = f.length();
2543              if (fileSize > (10 * 1024 * 1024))
2544              {
2545                throw new LDIFException(ERR_READ_URL_FILE_TOO_LARGE.get(
2546                                             attributeName, urlString,
2547                                             firstLineNumber, f.getAbsolutePath(),
2548                                             (10*1024*1024)),
2549                                        firstLineNumber, true, ldifLines, null);
2550              }
2551    
2552              int fileBytesRead              = 0;
2553              int fileBytesRemaining         = (int) fileSize;
2554              final byte[]          fileData = new byte[(int) fileSize];
2555              final FileInputStream fis      = new FileInputStream(f);
2556              try
2557              {
2558                while (fileBytesRead < fileSize)
2559                {
2560                  final int bytesRead =
2561                       fis.read(fileData, fileBytesRead, fileBytesRemaining);
2562                  if (bytesRead < 0)
2563                  {
2564                    // We hit the end of the file before we expected to.  This
2565                    // shouldn't happen unless the file size changed since we first
2566                    // looked at it, which we won't allow.
2567                    throw new LDIFException(ERR_READ_URL_FILE_SIZE_CHANGED.get(
2568                                                 attributeName, urlString,
2569                                                 firstLineNumber,
2570                                                 f.getAbsolutePath()),
2571                                            firstLineNumber, true, ldifLines, null);
2572                  }
2573    
2574                  fileBytesRead      += bytesRead;
2575                  fileBytesRemaining -= bytesRead;
2576                }
2577    
2578                if (fis.read() != -1)
2579                {
2580                  // There is still more data to read.  This shouldn't happen unless
2581                  // the file size changed since we first looked at it, which we
2582                  // won't allow.
2583                  throw new LDIFException(ERR_READ_URL_FILE_SIZE_CHANGED.get(
2584                                               attributeName, urlString,
2585                                               firstLineNumber,
2586                                               f.getAbsolutePath()),
2587                                          firstLineNumber, true, ldifLines, null);
2588                }
2589              }
2590              finally
2591              {
2592                fis.close();
2593              }
2594    
2595              if (attrObject == null)
2596              {
2597                attr = new Attribute(attributeName, fileData);
2598                attributes.put(lowerName, attr);
2599              }
2600              else
2601              {
2602                if (! ldifAttr.addValue(new ASN1OctetString(fileData),
2603                           duplicateValueBehavior))
2604                {
2605                  if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2606                  {
2607                    throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2608                         firstLineNumber, attributeName), firstLineNumber, true,
2609                         ldifLines, null);
2610                  }
2611                }
2612              }
2613            }
2614            catch (LDIFException le)
2615            {
2616              debugException(le);
2617              throw le;
2618            }
2619            catch (Exception e)
2620            {
2621              debugException(e);
2622              throw new LDIFException(ERR_READ_URL_EXCEPTION.get(attributeName,
2623                                           urlString, firstLineNumber, e),
2624                                      firstLineNumber, true, ldifLines, e);
2625            }
2626          }
2627          else
2628          {
2629            // Skip over any spaces leading up to the value, and then the rest of
2630            // the string is the value.
2631            int pos = colonPos+1;
2632            while ((pos < length) && (line.charAt(pos) == ' '))
2633            {
2634              pos++;
2635            }
2636    
2637            final String valueString = line.substring(pos);
2638            if (attrObject == null)
2639            {
2640              attr = new Attribute(attributeName, valueString);
2641              attributes.put(lowerName, attr);
2642            }
2643            else
2644            {
2645              try
2646              {
2647                if (! ldifAttr.addValue(new ASN1OctetString(valueString),
2648                           duplicateValueBehavior))
2649                {
2650                  if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
2651                  {
2652                    throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
2653                         firstLineNumber, attributeName), firstLineNumber, true,
2654                         ldifLines, null);
2655                  }
2656                }
2657              }
2658              catch (LDAPException le)
2659              {
2660                throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
2661                     firstLineNumber, attributeName, getExceptionMessage(le)),
2662                     firstLineNumber, true, ldifLines, le);
2663              }
2664            }
2665          }
2666        }
2667    
2668        final ArrayList<Attribute> attrList =
2669             new ArrayList<Attribute>(attributes.size());
2670        for (final Object o : attributes.values())
2671        {
2672          if (o instanceof Attribute)
2673          {
2674            attrList.add((Attribute) o);
2675          }
2676          else
2677          {
2678            attrList.add(((LDIFAttribute) o).toAttribute());
2679          }
2680        }
2681    
2682        return attrList;
2683      }
2684    
2685    
2686    
2687      /**
2688       * Parses the data available through the provided iterator into an array of
2689       * modifications suitable for use in a modify change record.
2690       *
2691       * @param  dn                     The DN of the entry being parsed.
2692       * @param  trailingSpaceBehavior  The behavior that should be exhibited when
2693       *                                encountering attribute values which are not
2694       *                                base64-encoded but contain trailing spaces.
2695       * @param  ldifLines              The lines that comprise the LDIF
2696       *                                representation of the full record being
2697       *                                parsed.
2698       * @param  iterator               The iterator to use to access the
2699       *                                modification data.
2700       * @param  firstLineNumber        The line number for the start of the record.
2701       *
2702       * @return  An array containing the modifications that were read.
2703       *
2704       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
2705       *                         set of modifications.
2706       */
2707      private static Modification[] parseModifications(final String dn,
2708           final TrailingSpaceBehavior trailingSpaceBehavior,
2709           final ArrayList<StringBuilder> ldifLines,
2710           final Iterator<StringBuilder> iterator, final long firstLineNumber)
2711           throws LDIFException
2712      {
2713        final ArrayList<Modification> modList =
2714             new ArrayList<Modification>(ldifLines.size());
2715    
2716        while (iterator.hasNext())
2717        {
2718          // The first line must start with "add:", "delete:", "replace:", or
2719          // "increment:" followed by an attribute name.
2720          StringBuilder line = iterator.next();
2721          handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2722          int colonPos = line.indexOf(":");
2723          if (colonPos < 0)
2724          {
2725            throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber),
2726                                    firstLineNumber, true, ldifLines, null);
2727          }
2728    
2729          final ModificationType modType;
2730          final String modTypeStr = toLowerCase(line.substring(0, colonPos));
2731          if (modTypeStr.equals("add"))
2732          {
2733            modType = ModificationType.ADD;
2734          }
2735          else if (modTypeStr.equals("delete"))
2736          {
2737            modType = ModificationType.DELETE;
2738          }
2739          else if (modTypeStr.equals("replace"))
2740          {
2741            modType = ModificationType.REPLACE;
2742          }
2743          else if (modTypeStr.equals("increment"))
2744          {
2745            modType = ModificationType.INCREMENT;
2746          }
2747          else
2748          {
2749            throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr,
2750                                         firstLineNumber),
2751                                    firstLineNumber, true, ldifLines, null);
2752          }
2753    
2754          final String attributeName;
2755          int length = line.length();
2756          if (length == (colonPos+1))
2757          {
2758            // The colon was the last character on the line.  This is not
2759            // acceptable.
2760            throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
2761                                         firstLineNumber),
2762                                    firstLineNumber, true, ldifLines, null);
2763          }
2764          else if (line.charAt(colonPos+1) == ':')
2765          {
2766            // Skip over any spaces leading up to the value, and then the rest of
2767            // the string is the base64-encoded attribute name.
2768            int pos = colonPos+2;
2769            while ((pos < length) && (line.charAt(pos) == ' '))
2770            {
2771              pos++;
2772            }
2773    
2774            try
2775            {
2776              final byte[] dnBytes = Base64.decode(line.substring(pos));
2777              attributeName = new String(dnBytes, "UTF-8");
2778            }
2779            catch (final ParseException pe)
2780            {
2781              debugException(pe);
2782              throw new LDIFException(
2783                   ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
2784                        firstLineNumber, pe.getMessage()),
2785                   firstLineNumber, true, ldifLines, pe);
2786            }
2787            catch (final Exception e)
2788            {
2789              debugException(e);
2790              throw new LDIFException(
2791                   ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
2792                        firstLineNumber, e),
2793                   firstLineNumber, true, ldifLines, e);
2794            }
2795          }
2796          else
2797          {
2798            // Skip over any spaces leading up to the value, and then the rest of
2799            // the string is the attribute name.
2800            int pos = colonPos+1;
2801            while ((pos < length) && (line.charAt(pos) == ' '))
2802            {
2803              pos++;
2804            }
2805    
2806            attributeName = line.substring(pos);
2807          }
2808    
2809          if (attributeName.length() == 0)
2810          {
2811            throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
2812                                         firstLineNumber),
2813                                    firstLineNumber, true, ldifLines, null);
2814          }
2815    
2816    
2817          // The next zero or more lines may be the set of attribute values.  Keep
2818          // reading until we reach the end of the iterator or until we find a line
2819          // with just a "-".
2820          final ArrayList<ASN1OctetString> valueList =
2821               new ArrayList<ASN1OctetString>(ldifLines.size());
2822          while (iterator.hasNext())
2823          {
2824            line = iterator.next();
2825            handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2826            if (line.toString().equals("-"))
2827            {
2828              break;
2829            }
2830    
2831            colonPos = line.indexOf(":");
2832            if (colonPos < 0)
2833            {
2834              throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
2835                                      firstLineNumber, true, ldifLines, null);
2836            }
2837            else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName))
2838            {
2839              throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get(
2840                                           firstLineNumber,
2841                                           line.substring(0, colonPos),
2842                                           attributeName),
2843                                      firstLineNumber, true, ldifLines, null);
2844            }
2845    
2846            final ASN1OctetString value;
2847            length = line.length();
2848            if (length == (colonPos+1))
2849            {
2850              // The colon was the last character on the line.  This is fine.
2851              value = new ASN1OctetString();
2852            }
2853            else if (line.charAt(colonPos+1) == ':')
2854            {
2855              // Skip over any spaces leading up to the value, and then the rest of
2856              // the string is the base64-encoded value.  This is unusual and
2857              // unnecessary, but is nevertheless acceptable.
2858              int pos = colonPos+2;
2859              while ((pos < length) && (line.charAt(pos) == ' '))
2860              {
2861                pos++;
2862              }
2863    
2864              try
2865              {
2866                value = new ASN1OctetString(Base64.decode(line.substring(pos)));
2867              }
2868              catch (final ParseException pe)
2869              {
2870                debugException(pe);
2871                throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
2872                     attributeName, firstLineNumber, pe.getMessage()),
2873                     firstLineNumber, true, ldifLines, pe);
2874              }
2875              catch (final Exception e)
2876              {
2877                debugException(e);
2878                throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
2879                                             firstLineNumber, e),
2880                                        firstLineNumber, true, ldifLines, e);
2881              }
2882            }
2883            else
2884            {
2885              // Skip over any spaces leading up to the value, and then the rest of
2886              // the string is the value.
2887              int pos = colonPos+1;
2888              while ((pos < length) && (line.charAt(pos) == ' '))
2889              {
2890                pos++;
2891              }
2892    
2893              value = new ASN1OctetString(line.substring(pos));
2894            }
2895    
2896            valueList.add(value);
2897          }
2898    
2899          final ASN1OctetString[] values = new ASN1OctetString[valueList.size()];
2900          valueList.toArray(values);
2901    
2902          // If it's an add modification type, then there must be at least one
2903          // value.
2904          if ((modType.intValue() == ModificationType.ADD.intValue()) &&
2905              (values.length == 0))
2906          {
2907            throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName,
2908                                         firstLineNumber),
2909                                    firstLineNumber, true, ldifLines, null);
2910          }
2911    
2912          // If it's an increment modification type, then there must be exactly one
2913          // value.
2914          if ((modType.intValue() == ModificationType.INCREMENT.intValue()) &&
2915              (values.length != 1))
2916          {
2917            throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get(
2918                                         firstLineNumber, attributeName),
2919                                    firstLineNumber, true, ldifLines, null);
2920          }
2921    
2922          modList.add(new Modification(modType, attributeName, values));
2923        }
2924    
2925        final Modification[] mods = new Modification[modList.size()];
2926        modList.toArray(mods);
2927        return mods;
2928      }
2929    
2930    
2931    
2932      /**
2933       * Parses the data available through the provided iterator as the body of a
2934       * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional
2935       * newsuperior lines).
2936       *
2937       * @param  ldifLines              The lines that comprise the LDIF
2938       *                                representation of the full record being
2939       *                                parsed.
2940       * @param  iterator               The iterator to use to access the modify DN
2941       *                                data.
2942       * @param  dn                     The current DN of the entry.
2943       * @param  trailingSpaceBehavior  The behavior that should be exhibited when
2944       *                                encountering attribute values which are not
2945       *                                base64-encoded but contain trailing spaces.
2946       * @param  firstLineNumber        The line number for the start of the record.
2947       *
2948       * @return  The decoded modify DN change record.
2949       *
2950       * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
2951       *                         modify DN change record.
2952       */
2953      private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord(
2954           final ArrayList<StringBuilder> ldifLines,
2955           final Iterator<StringBuilder> iterator, final String dn,
2956           final TrailingSpaceBehavior trailingSpaceBehavior,
2957           final long firstLineNumber)
2958           throws LDIFException
2959      {
2960        // The next line must be the new RDN, and it must start with "newrdn:".
2961        StringBuilder line = iterator.next();
2962        handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2963        int colonPos = line.indexOf(":");
2964        if ((colonPos < 0) ||
2965            (! line.substring(0, colonPos).equalsIgnoreCase("newrdn")))
2966        {
2967          throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get(
2968                                       firstLineNumber),
2969                                  firstLineNumber, true, ldifLines, null);
2970        }
2971    
2972        final String newRDN;
2973        int length = line.length();
2974        if (length == (colonPos+1))
2975        {
2976          // The colon was the last character on the line.  This is not acceptable.
2977          throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
2978                                       firstLineNumber),
2979                                  firstLineNumber, true, ldifLines, null);
2980        }
2981        else if (line.charAt(colonPos+1) == ':')
2982        {
2983          // Skip over any spaces leading up to the value, and then the rest of the
2984          // string is the base64-encoded new RDN.
2985          int pos = colonPos+2;
2986          while ((pos < length) && (line.charAt(pos) == ' '))
2987          {
2988            pos++;
2989          }
2990    
2991          try
2992          {
2993            final byte[] dnBytes = Base64.decode(line.substring(pos));
2994            newRDN = new String(dnBytes, "UTF-8");
2995          }
2996          catch (final ParseException pe)
2997          {
2998            debugException(pe);
2999            throw new LDIFException(
3000                 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3001                                                                   pe.getMessage()),
3002                 firstLineNumber, true, ldifLines, pe);
3003          }
3004          catch (final Exception e)
3005          {
3006            debugException(e);
3007            throw new LDIFException(
3008                 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3009                                                                   e),
3010                 firstLineNumber, true, ldifLines, e);
3011          }
3012        }
3013        else
3014        {
3015          // Skip over any spaces leading up to the value, and then the rest of the
3016          // string is the new RDN.
3017          int pos = colonPos+1;
3018          while ((pos < length) && (line.charAt(pos) == ' '))
3019          {
3020            pos++;
3021          }
3022    
3023          newRDN = line.substring(pos);
3024        }
3025    
3026        if (newRDN.length() == 0)
3027        {
3028          throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3029                                       firstLineNumber),
3030                                  firstLineNumber, true, ldifLines, null);
3031        }
3032    
3033    
3034        // The next line must be the deleteOldRDN flag, and it must start with
3035        // 'deleteoldrdn:'.
3036        if (! iterator.hasNext())
3037        {
3038          throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3039                                       firstLineNumber),
3040                                  firstLineNumber, true, ldifLines, null);
3041        }
3042    
3043        line = iterator.next();
3044        handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3045        colonPos = line.indexOf(":");
3046        if ((colonPos < 0) ||
3047            (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn")))
3048        {
3049          throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3050                                       firstLineNumber),
3051                                  firstLineNumber, true, ldifLines, null);
3052        }
3053    
3054        final String deleteOldRDNStr;
3055        length = line.length();
3056        if (length == (colonPos+1))
3057        {
3058          // The colon was the last character on the line.  This is not acceptable.
3059          throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get(
3060                                       firstLineNumber),
3061                                  firstLineNumber, true, ldifLines, null);
3062        }
3063        else if (line.charAt(colonPos+1) == ':')
3064        {
3065          // Skip over any spaces leading up to the value, and then the rest of the
3066          // string is the base64-encoded value.  This is unusual and
3067          // unnecessary, but is nevertheless acceptable.
3068          int pos = colonPos+2;
3069          while ((pos < length) && (line.charAt(pos) == ' '))
3070          {
3071            pos++;
3072          }
3073    
3074          try
3075          {
3076            final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3077            deleteOldRDNStr = new String(changeTypeBytes, "UTF-8");
3078          }
3079          catch (final ParseException pe)
3080          {
3081            debugException(pe);
3082            throw new LDIFException(
3083                 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3084                      firstLineNumber, pe.getMessage()),
3085                 firstLineNumber, true, ldifLines, pe);
3086          }
3087          catch (final Exception e)
3088          {
3089            debugException(e);
3090            throw new LDIFException(
3091                 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3092                      firstLineNumber, e),
3093                 firstLineNumber, true, ldifLines, e);
3094          }
3095        }
3096        else
3097        {
3098          // Skip over any spaces leading up to the value, and then the rest of the
3099          // string is the value.
3100          int pos = colonPos+1;
3101          while ((pos < length) && (line.charAt(pos) == ' '))
3102          {
3103            pos++;
3104          }
3105    
3106          deleteOldRDNStr = line.substring(pos);
3107        }
3108    
3109        final boolean deleteOldRDN;
3110        if (deleteOldRDNStr.equals("0"))
3111        {
3112          deleteOldRDN = false;
3113        }
3114        else if (deleteOldRDNStr.equals("1"))
3115        {
3116          deleteOldRDN = true;
3117        }
3118        else if (deleteOldRDNStr.equalsIgnoreCase("false") ||
3119                 deleteOldRDNStr.equalsIgnoreCase("no"))
3120        {
3121          // This is technically illegal, but we'll allow it.
3122          deleteOldRDN = false;
3123        }
3124        else if (deleteOldRDNStr.equalsIgnoreCase("true") ||
3125                 deleteOldRDNStr.equalsIgnoreCase("yes"))
3126        {
3127          // This is also technically illegal, but we'll allow it.
3128          deleteOldRDN = false;
3129        }
3130        else
3131        {
3132          throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get(
3133                                       deleteOldRDNStr, firstLineNumber),
3134                                  firstLineNumber, true, ldifLines, null);
3135        }
3136    
3137    
3138        // If there is another line, then it must be the new superior DN and it must
3139        // start with "newsuperior:".  If this is absent, then it's fine.
3140        final String newSuperiorDN;
3141        if (iterator.hasNext())
3142        {
3143          line = iterator.next();
3144          handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3145          colonPos = line.indexOf(":");
3146          if ((colonPos < 0) ||
3147              (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior")))
3148          {
3149            throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get(
3150                                         firstLineNumber),
3151                                    firstLineNumber, true, ldifLines, null);
3152          }
3153    
3154          length = line.length();
3155          if (length == (colonPos+1))
3156          {
3157            // The colon was the last character on the line.  This is fine.
3158            newSuperiorDN = "";
3159          }
3160          else if (line.charAt(colonPos+1) == ':')
3161          {
3162            // Skip over any spaces leading up to the value, and then the rest of
3163            // the string is the base64-encoded new superior DN.
3164            int pos = colonPos+2;
3165            while ((pos < length) && (line.charAt(pos) == ' '))
3166            {
3167              pos++;
3168            }
3169    
3170            try
3171            {
3172              final byte[] dnBytes = Base64.decode(line.substring(pos));
3173              newSuperiorDN = new String(dnBytes, "UTF-8");
3174            }
3175            catch (final ParseException pe)
3176            {
3177              debugException(pe);
3178              throw new LDIFException(
3179                   ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3180                        firstLineNumber, pe.getMessage()),
3181                   firstLineNumber, true, ldifLines, pe);
3182            }
3183            catch (final Exception e)
3184            {
3185              debugException(e);
3186              throw new LDIFException(
3187                   ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3188                        firstLineNumber, e),
3189                   firstLineNumber, true, ldifLines, e);
3190            }
3191          }
3192          else
3193          {
3194            // Skip over any spaces leading up to the value, and then the rest of
3195            // the string is the new superior DN.
3196            int pos = colonPos+1;
3197            while ((pos < length) && (line.charAt(pos) == ' '))
3198            {
3199              pos++;
3200            }
3201    
3202            newSuperiorDN = line.substring(pos);
3203          }
3204        }
3205        else
3206        {
3207          newSuperiorDN = null;
3208        }
3209    
3210    
3211        // There must not be any more lines.
3212        if (iterator.hasNext())
3213        {
3214          throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber),
3215                                  firstLineNumber, true, ldifLines, null);
3216        }
3217    
3218        return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN,
3219                                            newSuperiorDN);
3220      }
3221    
3222    
3223    
3224      /**
3225       * Examines the line contained in the provided buffer to determine whether it
3226       * may contain one or more illegal trailing spaces.  If it does, then those
3227       * spaces will either be stripped out or an exception will be thrown to
3228       * indicate that they are illegal.
3229       *
3230       * @param  buffer                 The buffer to be examined.
3231       * @param  dn                     The DN of the LDIF record being parsed.  It
3232       *                                may be {@code null} if the DN is not yet
3233       *                                known (e.g., because the provided line is
3234       *                                expected to contain that DN).
3235       * @param  firstLineNumber        The approximate line number in the LDIF
3236       *                                source on which the LDIF record begins.
3237       * @param  trailingSpaceBehavior  The behavior that should be exhibited when
3238       *                                encountering attribute values which are not
3239       *                                base64-encoded but contain trailing spaces.
3240       *
3241       * @throws  LDIFException  If the line contained in the provided buffer ends
3242       *                         with one or more illegal trailing spaces and
3243       *                         {@code stripTrailingSpaces} was provided with a
3244       *                         value of {@code false}.
3245       */
3246      private static void handleTrailingSpaces(final StringBuilder buffer,
3247                               final String dn, final long firstLineNumber,
3248                               final TrailingSpaceBehavior trailingSpaceBehavior)
3249              throws LDIFException
3250      {
3251        int pos = buffer.length() - 1;
3252        boolean trailingFound = false;
3253        while ((pos >= 0) && (buffer.charAt(pos) == ' '))
3254        {
3255          trailingFound = true;
3256          pos--;
3257        }
3258    
3259        if (trailingFound && (buffer.charAt(pos) != ':'))
3260        {
3261          switch (trailingSpaceBehavior)
3262          {
3263            case STRIP:
3264              buffer.setLength(pos+1);
3265              break;
3266    
3267            case REJECT:
3268              if (dn == null)
3269              {
3270                throw new LDIFException(
3271                     ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber,
3272                          buffer.toString()),
3273                     firstLineNumber, true);
3274              }
3275              else
3276              {
3277                throw new LDIFException(
3278                     ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn,
3279                          firstLineNumber, buffer.toString()),
3280                     firstLineNumber, true);
3281              }
3282    
3283            case RETAIN:
3284            default:
3285              // No action will be taken.
3286              break;
3287          }
3288        }
3289      }
3290    
3291    
3292    
3293      /**
3294       * This represents an unparsed LDIFRecord.  It stores the line number of the
3295       * first line of the record and each line of the record.
3296       */
3297      private static final class UnparsedLDIFRecord
3298      {
3299        private final ArrayList<StringBuilder> lineList;
3300        private final long firstLineNumber;
3301        private final Exception failureCause;
3302        private final boolean isEOF;
3303        private final DuplicateValueBehavior duplicateValueBehavior;
3304        private final Schema schema;
3305        private final TrailingSpaceBehavior trailingSpaceBehavior;
3306    
3307    
3308    
3309        /**
3310         * Constructor.
3311         *
3312         * @param  lineList                The lines that comprise the LDIF record.
3313         * @param  duplicateValueBehavior  The behavior to exhibit if the entry
3314         *                                 contains duplicate attribute values.
3315         * @param  trailingSpaceBehavior   Specifies the behavior to exhibit when
3316         *                                 encountering trailing spaces in
3317         *                                 non-base64-encoded attribute values.
3318         * @param  schema                  The schema to use when parsing, if
3319         *                                 applicable.
3320         * @param  firstLineNumber         The first line number of the LDIF record.
3321         */
3322        private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList,
3323                     final DuplicateValueBehavior duplicateValueBehavior,
3324                     final TrailingSpaceBehavior trailingSpaceBehavior,
3325                     final Schema schema, final long firstLineNumber)
3326        {
3327          this.lineList               = lineList;
3328          this.firstLineNumber        = firstLineNumber;
3329          this.duplicateValueBehavior = duplicateValueBehavior;
3330          this.trailingSpaceBehavior  = trailingSpaceBehavior;
3331          this.schema                 = schema;
3332    
3333          failureCause = null;
3334          isEOF =
3335               (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty());
3336        }
3337    
3338    
3339    
3340        /**
3341         * Constructor.
3342         *
3343         * @param failureCause  The Exception thrown when reading from the input.
3344         */
3345        private UnparsedLDIFRecord(final Exception failureCause)
3346        {
3347          this.failureCause = failureCause;
3348    
3349          lineList               = null;
3350          firstLineNumber        = 0;
3351          duplicateValueBehavior = DuplicateValueBehavior.REJECT;
3352          trailingSpaceBehavior  = TrailingSpaceBehavior.REJECT;
3353          schema                 = null;
3354          isEOF                  = false;
3355        }
3356    
3357    
3358    
3359        /**
3360         * Return the lines that comprise the LDIF record.
3361         *
3362         * @return  The lines that comprise the LDIF record.
3363         */
3364        private ArrayList<StringBuilder> getLineList()
3365        {
3366          return lineList;
3367        }
3368    
3369    
3370    
3371        /**
3372         * Retrieves the behavior to exhibit when encountering duplicate attribute
3373         * values.
3374         *
3375         * @return  The behavior to exhibit when encountering duplicate attribute
3376         *          values.
3377         */
3378        private DuplicateValueBehavior getDuplicateValueBehavior()
3379        {
3380          return duplicateValueBehavior;
3381        }
3382    
3383    
3384    
3385        /**
3386         * Retrieves the behavior that should be exhibited when encountering
3387         * attribute values which are not base64-encoded but contain trailing
3388         * spaces.  The LDIF specification strongly recommends that any value which
3389         * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK
3390         * LDIF parser may be configured to automatically strip these spaces, to
3391         * preserve them, or to reject any entry or change record containing them.
3392         *
3393         * @return  The behavior that should be exhibited when encountering
3394         *          attribute values which are not base64-encoded but contain
3395         *          trailing spaces.
3396         */
3397        private TrailingSpaceBehavior getTrailingSpaceBehavior()
3398        {
3399          return trailingSpaceBehavior;
3400        }
3401    
3402    
3403    
3404        /**
3405         * Retrieves the schema that should be used when parsing the record, if
3406         * applicable.
3407         *
3408         * @return  The schema that should be used when parsing the record, or
3409         *          {@code null} if none should be used.
3410         */
3411        private Schema getSchema()
3412        {
3413          return schema;
3414        }
3415    
3416    
3417    
3418        /**
3419         * Return the first line number of the LDIF record.
3420         *
3421         * @return  The first line number of the LDIF record.
3422         */
3423        private long getFirstLineNumber()
3424        {
3425          return firstLineNumber;
3426        }
3427    
3428    
3429    
3430        /**
3431         * Return {@code true} iff the end of the input was reached.
3432         *
3433         * @return  {@code true} iff the end of the input was reached.
3434         */
3435        private boolean isEOF()
3436        {
3437          return isEOF;
3438        }
3439    
3440    
3441    
3442        /**
3443         * Returns the reason that reading the record lines failed.  This normally
3444         * is only non-null if something bad happened to the input stream (like
3445         * a disk read error).
3446         *
3447         * @return  The reason that reading the record lines failed.
3448         */
3449        private Exception getFailureCause()
3450        {
3451          return failureCause;
3452        }
3453      }
3454    
3455    
3456      /**
3457       * When processing in asynchronous mode, this thread is responsible for
3458       * reading the raw unparsed records from the input and submitting them for
3459       * processing.
3460       */
3461      private final class LineReaderThread
3462           extends Thread
3463      {
3464        /**
3465         * Constructor.
3466         */
3467        private LineReaderThread()
3468        {
3469          super("Asynchronous LDIF line reader");
3470          setDaemon(true);
3471        }
3472    
3473    
3474    
3475        /**
3476         * Reads raw, unparsed records from the input and submits them for
3477         * processing until the input is finished or closed.
3478         */
3479        @Override()
3480        public void run()
3481        {
3482          try
3483          {
3484            boolean stopProcessing = false;
3485            while (!stopProcessing)
3486            {
3487              UnparsedLDIFRecord unparsedRecord = null;
3488              try
3489              {
3490                unparsedRecord = readUnparsedRecord();
3491              }
3492              catch (IOException e)
3493              {
3494                debugException(e);
3495                unparsedRecord = new UnparsedLDIFRecord(e);
3496                stopProcessing = true;
3497              }
3498              catch (Exception e)
3499              {
3500                debugException(e);
3501                unparsedRecord = new UnparsedLDIFRecord(e);
3502              }
3503    
3504              try
3505              {
3506                asyncParser.submit(unparsedRecord);
3507              }
3508              catch (InterruptedException e)
3509              {
3510                debugException(e);
3511                // If this thread is interrupted, then someone wants us to stop
3512                // processing, so that's what we'll do.
3513                stopProcessing = true;
3514              }
3515    
3516              if ((unparsedRecord == null) || (unparsedRecord.isEOF()))
3517              {
3518                stopProcessing = true;
3519              }
3520            }
3521          }
3522          finally
3523          {
3524            try
3525            {
3526              asyncParser.shutdown();
3527            }
3528            catch (InterruptedException e)
3529            {
3530              debugException(e);
3531            }
3532            finally
3533            {
3534              asyncParsingComplete.set(true);
3535            }
3536          }
3537        }
3538      }
3539    
3540    
3541    
3542      /**
3543       * Used to parse Records asynchronously.
3544       */
3545      private final class RecordParser implements Processor<UnparsedLDIFRecord,
3546                                                            LDIFRecord>
3547      {
3548          /**
3549           * {@inheritDoc}
3550           */
3551          public LDIFRecord process(final UnparsedLDIFRecord input)
3552               throws LDIFException
3553          {
3554            LDIFRecord record = decodeRecord(input, relativeBasePath);
3555    
3556            if ((record instanceof Entry) && (entryTranslator != null))
3557            {
3558              record = entryTranslator.translate((Entry) record,
3559                                       input.getFirstLineNumber());
3560    
3561              if (record == null)
3562              {
3563                record = SKIP_ENTRY;
3564              }
3565            }
3566            return record;
3567          }
3568      }
3569    }