001 package org.crsh.cmdline.matcher.tokenizer; 002 003 import org.crsh.cmdline.Delimiter; 004 005 import java.util.ArrayList; 006 import java.util.Iterator; 007 import java.util.NoSuchElementException; 008 009 /** 010 * @author <a href="mailto:julien.viet@exoplatform.com">Julien Viet</a> 011 */ 012 public class Tokenizer implements Iterator<Token> { 013 014 /** . */ 015 private final CharSequence s; 016 017 /** . */ 018 private int index; 019 020 /** . */ 021 private ArrayList<Token> stack; 022 023 /** . */ 024 private int ptr; 025 026 /** . */ 027 private Delimiter delimiter; 028 029 public Tokenizer(CharSequence s) { 030 this.s = s; 031 this.stack = new ArrayList<Token>(); 032 this.index = 0; 033 this.delimiter = null; 034 } 035 036 public boolean hasNext() { 037 if (ptr < stack.size()) { 038 return true; 039 } else { 040 Token next = parse(); 041 if (next != null) { 042 stack.add(next); 043 } 044 return next != null; 045 } 046 } 047 048 private Token parse() { 049 Token token = null; 050 if (index < s.length()) { 051 char c = s.charAt(index); 052 int from = index; 053 while (true) { 054 if (Character.isWhitespace(c)) { 055 index++; 056 if (index < s.length()) { 057 c = s.charAt(index); 058 } else { 059 break; 060 } 061 } else { 062 break; 063 } 064 } 065 if (index > from) { 066 token = new Token.Whitespace(from, s.subSequence(from, index).toString()); 067 } else { 068 State state = new State(); 069 while (true) { 070 if (Character.isWhitespace(c) && state.escape == Escape.NONE) { 071 break; 072 } else { 073 index++; 074 state.push(c); 075 if (index < s.length()) { 076 c = s.charAt(index); 077 } else { 078 break; 079 } 080 } 081 } 082 if (index > from) { 083 switch (state.status) { 084 case INIT: { 085 token = new Token.Literal.Word(from, s.subSequence(from, index).toString(), state.buffer.toString()); 086 break; 087 } 088 case WORD: { 089 token = new Token.Literal.Word(from, s.subSequence(from, index).toString(), state.buffer.toString()); 090 break; 091 } 092 case SHORT_OPTION: { 093 token = new Token.Literal.Option.Short(from, s.subSequence(from, index).toString(), state.buffer.toString()); 094 break; 095 } 096 case LONG_OPTION: { 097 token = new Token.Literal.Option.Long(from, s.subSequence(from, index).toString(), state.buffer.toString()); 098 break; 099 } 100 default: 101 throw new AssertionError(state.status); 102 } 103 delimiter = state.escape.delimiter; 104 return token; 105 } 106 } 107 } 108 return token; 109 } 110 111 public Token next() { 112 if (hasNext()) { 113 return stack.get(ptr++); 114 } else { 115 throw new NoSuchElementException(); 116 } 117 } 118 119 public void remove() { 120 throw new UnsupportedOperationException(); 121 } 122 123 public int getIndex() { 124 Token peek = peek(); 125 if (peek != null) { 126 return peek.getFrom(); 127 } else { 128 return index; 129 } 130 } 131 132 public void pushBack() { 133 pushBack(1); 134 } 135 136 public void pushBack(int count) { 137 if (count < 0) { 138 throw new IllegalArgumentException(); 139 } 140 if (ptr - count < 0) { 141 throw new IllegalStateException("Trying to push back too many tokens"); 142 } else { 143 ptr -= count; 144 } 145 } 146 147 public Token peek() { 148 if (hasNext()) { 149 return stack.get(ptr); 150 } else { 151 return null; 152 } 153 } 154 155 public Delimiter getDelimiter() { 156 return delimiter; 157 } 158 }