001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.StringReader; 027import java.nio.CharBuffer; 028import java.nio.channels.Channels; 029import java.nio.channels.ReadableByteChannel; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.IllegalCharsetNameException; 033import java.nio.charset.UnsupportedCharsetException; 034import java.util.InputMismatchException; 035import java.util.Iterator; 036import java.util.LinkedHashMap; 037import java.util.Map; 038import java.util.Map.Entry; 039import java.util.NoSuchElementException; 040import java.util.Objects; 041import java.util.concurrent.locks.Lock; 042import java.util.concurrent.locks.ReentrantLock; 043import java.util.regex.Matcher; 044import java.util.regex.Pattern; 045 046import static org.apache.camel.util.BufferCaster.cast; 047 048public final class Scanner implements Iterator<String>, Closeable { 049 050 static { 051 WHITESPACE_PATTERN = Pattern.compile("\\s+"); 052 FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 053 } 054 055 private static final Lock LOCK = new ReentrantLock(); 056 private static final Map<String, Pattern> CACHE = new LinkedHashMap<>() { 057 @Override 058 protected boolean removeEldestEntry(Entry<String, Pattern> eldest) { 059 return size() >= 7; 060 } 061 }; 062 063 private static final Pattern WHITESPACE_PATTERN; 064 065 private static final Pattern FIND_ANY_PATTERN; 066 067 private static final int BUFFER_SIZE = 1024; 068 069 private final Readable source; 070 private final Pattern delimPattern; 071 private final Matcher matcher; 072 private CharBuffer buf; 073 private int position; 074 private boolean inputExhausted; 075 private boolean needInput; 076 private boolean skipped; 077 private int savedPosition = -1; 078 private boolean closed; 079 private IOException lastIOException; 080 081 public Scanner(InputStream source, String charsetName, String pattern) { 082 this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern)); 083 } 084 085 @SuppressWarnings("resource") 086 // The stream will be closed by the class lifecycle accordingly. 087 public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException { 088 this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern); 089 } 090 091 public Scanner(String source, String pattern) { 092 this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern)); 093 } 094 095 public Scanner(String source, Pattern pattern) { 096 this(new StringReader(Objects.requireNonNull(source, "source")), pattern); 097 } 098 099 public Scanner(ReadableByteChannel source, String charsetName, String pattern) { 100 this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern)); 101 } 102 103 public Scanner(Readable source, String pattern) { 104 this(Objects.requireNonNull(source, "source"), cachePattern(pattern)); 105 } 106 107 private Scanner(Readable source, Pattern pattern) { 108 this.source = source; 109 delimPattern = pattern != null ? pattern : WHITESPACE_PATTERN; 110 buf = CharBuffer.allocate(BUFFER_SIZE); 111 cast(buf).limit(0); 112 matcher = delimPattern.matcher(buf); 113 matcher.useTransparentBounds(true); 114 matcher.useAnchoringBounds(false); 115 } 116 117 private static CharsetDecoder toDecoder(String charsetName) { 118 try { 119 Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset(); 120 return cs.newDecoder(); 121 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 122 throw new IllegalArgumentException(e); 123 } 124 } 125 126 @Override 127 public boolean hasNext() { 128 if (closed) { 129 return false; 130 } 131 saveState(); 132 while (!inputExhausted) { 133 if (hasTokenInBuffer()) { 134 revertState(); 135 return true; 136 } 137 readMore(); 138 } 139 boolean result = hasTokenInBuffer(); 140 revertState(); 141 return result; 142 } 143 144 @Override 145 public String next() { 146 checkClosed(); 147 while (true) { 148 String token = getCompleteTokenInBuffer(); 149 if (token != null) { 150 skipped = false; 151 return token; 152 } 153 if (needInput) { 154 readMore(); 155 } else { 156 throwFor(); 157 } 158 } 159 } 160 161 public String getDelim() { 162 return delimPattern.pattern(); 163 } 164 165 private void saveState() { 166 savedPosition = position; 167 } 168 169 private void revertState() { 170 position = savedPosition; 171 savedPosition = -1; 172 skipped = false; 173 } 174 175 private void readMore() { 176 if (buf.limit() == buf.capacity()) { 177 expandBuffer(); 178 } 179 int p = buf.position(); 180 cast(buf).position(buf.limit()); 181 cast(buf).limit(buf.capacity()); 182 int n; 183 try { 184 n = source.read(buf); 185 } catch (IOException ioe) { 186 lastIOException = ioe; 187 n = -1; 188 } 189 if (n == -1) { 190 inputExhausted = true; 191 needInput = false; 192 } else if (n > 0) { 193 needInput = false; 194 } 195 cast(buf).limit(buf.position()); 196 cast(buf).position(p); 197 } 198 199 private void expandBuffer() { 200 int offset = savedPosition == -1 ? position : savedPosition; 201 cast(buf).position(offset); 202 if (offset > 0) { 203 buf.compact(); 204 translateSavedIndexes(offset); 205 position -= offset; 206 cast(buf).flip(); 207 } else { 208 int newSize = buf.capacity() * 2; 209 CharBuffer newBuf = CharBuffer.allocate(newSize); 210 newBuf.put(buf); 211 cast(newBuf).flip(); 212 translateSavedIndexes(offset); 213 position -= offset; 214 buf = newBuf; 215 matcher.reset(buf); 216 } 217 } 218 219 private void translateSavedIndexes(int offset) { 220 if (savedPosition != -1) { 221 savedPosition -= offset; 222 } 223 } 224 225 private void throwFor() { 226 skipped = false; 227 if (inputExhausted && position == buf.limit()) { 228 throw new NoSuchElementException(); 229 } else { 230 throw new InputMismatchException(); 231 } 232 } 233 234 private boolean hasTokenInBuffer() { 235 matcher.usePattern(delimPattern); 236 matcher.region(position, buf.limit()); 237 if (matcher.lookingAt()) { 238 position = matcher.end(); 239 } 240 return position != buf.limit(); 241 } 242 243 private String getCompleteTokenInBuffer() { 244 matcher.usePattern(delimPattern); 245 if (!skipped) { 246 matcher.region(position, buf.limit()); 247 if (matcher.lookingAt()) { 248 if (matcher.hitEnd() && !inputExhausted) { 249 needInput = true; 250 return null; 251 } 252 skipped = true; 253 position = matcher.end(); 254 } 255 } 256 if (position == buf.limit()) { 257 if (inputExhausted) { 258 return null; 259 } 260 needInput = true; 261 return null; 262 } 263 matcher.region(position, buf.limit()); 264 boolean foundNextDelim = matcher.find(); 265 if (foundNextDelim && matcher.end() == position) { 266 foundNextDelim = matcher.find(); 267 } 268 if (foundNextDelim) { 269 if (matcher.requireEnd() && !inputExhausted) { 270 needInput = true; 271 return null; 272 } 273 int tokenEnd = matcher.start(); 274 matcher.usePattern(FIND_ANY_PATTERN); 275 matcher.region(position, tokenEnd); 276 if (matcher.matches()) { 277 String s = matcher.group(); 278 position = matcher.end(); 279 return s; 280 } else { 281 return null; 282 } 283 } 284 if (inputExhausted) { 285 matcher.usePattern(FIND_ANY_PATTERN); 286 matcher.region(position, buf.limit()); 287 if (matcher.matches()) { 288 String s = matcher.group(); 289 position = matcher.end(); 290 return s; 291 } 292 return null; 293 } 294 needInput = true; 295 return null; 296 } 297 298 private void checkClosed() { 299 if (closed) { 300 throw new IllegalStateException(); 301 } 302 } 303 304 @Override 305 public void close() throws IOException { 306 if (!closed) { 307 closed = true; 308 if (source instanceof Closeable closeable) { 309 try { 310 closeable.close(); 311 } catch (IOException e) { 312 lastIOException = e; 313 } 314 } 315 } 316 if (lastIOException != null) { 317 throw lastIOException; 318 } 319 } 320 321 private static Pattern cachePattern(String pattern) { 322 if (pattern == null) { 323 return null; 324 } 325 LOCK.lock(); 326 try { 327 return CACHE.computeIfAbsent(pattern, Pattern::compile); 328 } finally { 329 LOCK.unlock(); 330 } 331 } 332 333}