001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.StringReader;
027import java.nio.CharBuffer;
028import java.nio.channels.Channels;
029import java.nio.channels.ReadableByteChannel;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.IllegalCharsetNameException;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.InputMismatchException;
035import java.util.Iterator;
036import java.util.LinkedHashMap;
037import java.util.Map;
038import java.util.Map.Entry;
039import java.util.NoSuchElementException;
040import java.util.Objects;
041import java.util.concurrent.locks.Lock;
042import java.util.concurrent.locks.ReentrantLock;
043import java.util.regex.Matcher;
044import java.util.regex.Pattern;
045
046import static org.apache.camel.util.BufferCaster.cast;
047
048public final class Scanner implements Iterator<String>, Closeable {
049
050    static {
051        WHITESPACE_PATTERN = Pattern.compile("\\s+");
052        FIND_ANY_PATTERN = Pattern.compile("(?s).*");
053    }
054
055    private static final Lock LOCK = new ReentrantLock();
056    private static final Map<String, Pattern> CACHE = new LinkedHashMap<>() {
057        @Override
058        protected boolean removeEldestEntry(Entry<String, Pattern> eldest) {
059            return size() >= 7;
060        }
061    };
062
063    private static final Pattern WHITESPACE_PATTERN;
064
065    private static final Pattern FIND_ANY_PATTERN;
066
067    private static final int BUFFER_SIZE = 1024;
068
069    private final Readable source;
070    private final Pattern delimPattern;
071    private final Matcher matcher;
072    private CharBuffer buf;
073    private int position;
074    private boolean inputExhausted;
075    private boolean needInput;
076    private boolean skipped;
077    private int savedPosition = -1;
078    private boolean closed;
079    private IOException lastIOException;
080
081    public Scanner(InputStream source, String charsetName, String pattern) {
082        this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern));
083    }
084
085    @SuppressWarnings("resource")
086    // The stream will be closed by the class lifecycle accordingly.
087    public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException {
088        this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern);
089    }
090
091    public Scanner(String source, String pattern) {
092        this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern));
093    }
094
095    public Scanner(String source, Pattern pattern) {
096        this(new StringReader(Objects.requireNonNull(source, "source")), pattern);
097    }
098
099    public Scanner(ReadableByteChannel source, String charsetName, String pattern) {
100        this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern));
101    }
102
103    public Scanner(Readable source, String pattern) {
104        this(Objects.requireNonNull(source, "source"), cachePattern(pattern));
105    }
106
107    private Scanner(Readable source, Pattern pattern) {
108        this.source = source;
109        delimPattern = pattern != null ? pattern : WHITESPACE_PATTERN;
110        buf = CharBuffer.allocate(BUFFER_SIZE);
111        cast(buf).limit(0);
112        matcher = delimPattern.matcher(buf);
113        matcher.useTransparentBounds(true);
114        matcher.useAnchoringBounds(false);
115    }
116
117    private static CharsetDecoder toDecoder(String charsetName) {
118        try {
119            Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset();
120            return cs.newDecoder();
121        } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
122            throw new IllegalArgumentException(e);
123        }
124    }
125
126    @Override
127    public boolean hasNext() {
128        if (closed) {
129            return false;
130        }
131        saveState();
132        while (!inputExhausted) {
133            if (hasTokenInBuffer()) {
134                revertState();
135                return true;
136            }
137            readMore();
138        }
139        boolean result = hasTokenInBuffer();
140        revertState();
141        return result;
142    }
143
144    @Override
145    public String next() {
146        checkClosed();
147        while (true) {
148            String token = getCompleteTokenInBuffer();
149            if (token != null) {
150                skipped = false;
151                return token;
152            }
153            if (needInput) {
154                readMore();
155            } else {
156                throwFor();
157            }
158        }
159    }
160
161    public String getDelim() {
162        return delimPattern.pattern();
163    }
164
165    private void saveState() {
166        savedPosition = position;
167    }
168
169    private void revertState() {
170        position = savedPosition;
171        savedPosition = -1;
172        skipped = false;
173    }
174
175    private void readMore() {
176        if (buf.limit() == buf.capacity()) {
177            expandBuffer();
178        }
179        int p = buf.position();
180        cast(buf).position(buf.limit());
181        cast(buf).limit(buf.capacity());
182        int n;
183        try {
184            n = source.read(buf);
185        } catch (IOException ioe) {
186            lastIOException = ioe;
187            n = -1;
188        }
189        if (n == -1) {
190            inputExhausted = true;
191            needInput = false;
192        } else if (n > 0) {
193            needInput = false;
194        }
195        cast(buf).limit(buf.position());
196        cast(buf).position(p);
197    }
198
199    private void expandBuffer() {
200        int offset = savedPosition == -1 ? position : savedPosition;
201        cast(buf).position(offset);
202        if (offset > 0) {
203            buf.compact();
204            translateSavedIndexes(offset);
205            position -= offset;
206            cast(buf).flip();
207        } else {
208            int newSize = buf.capacity() * 2;
209            CharBuffer newBuf = CharBuffer.allocate(newSize);
210            newBuf.put(buf);
211            cast(newBuf).flip();
212            translateSavedIndexes(offset);
213            position -= offset;
214            buf = newBuf;
215            matcher.reset(buf);
216        }
217    }
218
219    private void translateSavedIndexes(int offset) {
220        if (savedPosition != -1) {
221            savedPosition -= offset;
222        }
223    }
224
225    private void throwFor() {
226        skipped = false;
227        if (inputExhausted && position == buf.limit()) {
228            throw new NoSuchElementException();
229        } else {
230            throw new InputMismatchException();
231        }
232    }
233
234    private boolean hasTokenInBuffer() {
235        matcher.usePattern(delimPattern);
236        matcher.region(position, buf.limit());
237        if (matcher.lookingAt()) {
238            position = matcher.end();
239        }
240        return position != buf.limit();
241    }
242
243    private String getCompleteTokenInBuffer() {
244        matcher.usePattern(delimPattern);
245        if (!skipped) {
246            matcher.region(position, buf.limit());
247            if (matcher.lookingAt()) {
248                if (matcher.hitEnd() && !inputExhausted) {
249                    needInput = true;
250                    return null;
251                }
252                skipped = true;
253                position = matcher.end();
254            }
255        }
256        if (position == buf.limit()) {
257            if (inputExhausted) {
258                return null;
259            }
260            needInput = true;
261            return null;
262        }
263        matcher.region(position, buf.limit());
264        boolean foundNextDelim = matcher.find();
265        if (foundNextDelim && matcher.end() == position) {
266            foundNextDelim = matcher.find();
267        }
268        if (foundNextDelim) {
269            if (matcher.requireEnd() && !inputExhausted) {
270                needInput = true;
271                return null;
272            }
273            int tokenEnd = matcher.start();
274            matcher.usePattern(FIND_ANY_PATTERN);
275            matcher.region(position, tokenEnd);
276            if (matcher.matches()) {
277                String s = matcher.group();
278                position = matcher.end();
279                return s;
280            } else {
281                return null;
282            }
283        }
284        if (inputExhausted) {
285            matcher.usePattern(FIND_ANY_PATTERN);
286            matcher.region(position, buf.limit());
287            if (matcher.matches()) {
288                String s = matcher.group();
289                position = matcher.end();
290                return s;
291            }
292            return null;
293        }
294        needInput = true;
295        return null;
296    }
297
298    private void checkClosed() {
299        if (closed) {
300            throw new IllegalStateException();
301        }
302    }
303
304    @Override
305    public void close() throws IOException {
306        if (!closed) {
307            closed = true;
308            if (source instanceof Closeable closeable) {
309                try {
310                    closeable.close();
311                } catch (IOException e) {
312                    lastIOException = e;
313                }
314            }
315        }
316        if (lastIOException != null) {
317            throw lastIOException;
318        }
319    }
320
321    private static Pattern cachePattern(String pattern) {
322        if (pattern == null) {
323            return null;
324        }
325        LOCK.lock();
326        try {
327            return CACHE.computeIfAbsent(pattern, Pattern::compile);
328        } finally {
329            LOCK.unlock();
330        }
331    }
332
333}