001    package ca.uhn.hl7v2.preparser;
002    
003    import java.util.ArrayList;
004    import java.util.Iterator;
005    import java.util.List;
006    import java.util.Map;
007    import java.util.Properties;
008    import java.util.SortedMap;
009    import java.util.StringTokenizer;
010    import java.util.TreeMap;
011    
012    import ca.uhn.hl7v2.parser.EncodingCharacters;
013    
014    /*
015    The point of this class (all static members, not instantiatable) is to take a
016    traditionally-encoded HL7 message and add all it's contents to a Properties
017    object, via the parseMessage() method.
018    
019    The key-value pairs added to the Properties argument have keys that represent a
020    datum's location in the message.  (in the ZYX-1-2[0] style.  TODO: define
021    exactly.)  See Datum, particularly the toString() of that class.
022    Anyway, the Properties keys are those and the values are the tokens found.
023    
024    Note: we accept useless field repetition separators at the end of a 
025    field repetition sequence.  i.e. |855-4545~555-3792~~~| , and interpret this
026    as definining repetitions 0 and 1.  This might not be allowed.  (HL7 2.3.1
027    section 2.10 explicitly allows this behaviour for fields / components /
028    subcomponents, but the allowance is notably absent for repetitions.  TODO:
029    nail down.)  We allow it anyway.
030    
031    Also, we accept things like |855-4545~~555-3792|, and interpret it as defining
032    repetitions 0 and 2.  The spec would seem to disallow this too, but there's no
033    harm.  :D  
034    */
035    public class ER7 {
036            
037            private ER7() {}
038    
039            /** characters that delimit segments.  for use with StringTokenizer.
040            We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is
041            the only segment delimiter.  TODO: check other versions. */ 
042            static final String segmentSeparators = "\r\n\f";
043    
044            /** Parses message and dumps contents to props, with keys in the 
045            ZYX[a]-b[c]-d-e style.
046            */
047            public static boolean parseMessage(/*out*/ Properties props, 
048                    /*in*/ List<DatumPath> msgMask, /*in*/ String message)
049            {
050                    boolean ok = false;
051                    if(message != null) {
052                            if(props == null)
053                                    props = new Properties();
054    
055                            StringTokenizer messageTokenizer 
056                                    = new StringTokenizer(message, segmentSeparators);
057                            if(messageTokenizer.hasMoreTokens()) {
058                                    String firstSegment = messageTokenizer.nextToken();
059                                    EncodingCharacters encodingChars = new EncodingCharacters('0', "0000");
060                                    if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) {
061                                            ok = true;
062                                            SortedMap<String, Integer> segmentId2nextRepIdx = new TreeMap<String, Integer>();
063                                            segmentId2nextRepIdx.put(new String("MSH"), 1); 
064                                                    // in case we find another MSH segment, heh.
065                                            while(messageTokenizer.hasMoreTokens()) {
066                                                    parseSegmentWhole(props, segmentId2nextRepIdx, 
067                                                            msgMask, encodingChars, messageTokenizer.nextToken());
068                                            }
069                                    }
070                            }
071                    }
072                    return ok;
073            }
074            
075            /** given segment, starting with "MSH", then encoding characters, etc...
076            put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found,
077            plus everything else found in 'segment' */
078            protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 
079                    /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
080                    /*in*/ String segment) 
081            {
082                    boolean ret = false;
083                    try {
084                            ER7SegmentHandler handler = new ER7SegmentHandler();
085                            handler.m_props = props;
086                            handler.m_encodingChars = encodingChars;
087                            handler.m_segmentId = "MSH";
088                            handler.m_segmentRepIdx = 0;
089                            if(msgMask != null)
090                                    handler.m_msgMask = msgMask;
091                            else {
092                                    handler.m_msgMask = new ArrayList<DatumPath>();
093                                    handler.m_msgMask.add(new DatumPath()); // everything will pass this
094                                            // (every DatumPath startsWith the zero-length DatumPath)
095                            }
096    
097                            encodingChars.setFieldSeparator(segment.charAt(3));
098                            List<Integer> nodeKey = new ArrayList<Integer>();
099                            nodeKey.add(new Integer(0));
100                            handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator()));
101                            encodingChars.setComponentSeparator(segment.charAt(4));
102                            encodingChars.setRepetitionSeparator(segment.charAt(5));
103                            encodingChars.setEscapeCharacter(segment.charAt(6));
104                            encodingChars.setSubcomponentSeparator(segment.charAt(7));
105                            nodeKey.set(0, new Integer(1));
106                            handler.putDatum(nodeKey, encodingChars.toString());
107    
108                            if(segment.charAt(8) == encodingChars.getFieldSeparator()) {    
109                                    ret = true; 
110                                    // now -- we recurse 
111                                    // through fields / field-repetitions / components / subcomponents.
112                                    nodeKey.clear();
113                                    nodeKey.add(new Integer(2));
114                                    parseSegmentGuts(handler, segment.substring(9), nodeKey);
115                            }
116                    }
117                    catch(IndexOutOfBoundsException e) {}
118                    catch(NullPointerException e) {}
119    
120                    return ret;
121            }
122    
123            /** pass in a whole segment (of type other than MSH), including message type
124            at the start, according to encodingChars, and we'll parse the contents and
125            put them in props. */
126            protected static void parseSegmentWhole(/*out*/ Properties props, 
127                    /*in/out*/ Map<String, Integer> segmentId2nextRepIdx, 
128                    /*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
129                    /*in*/ String segment)
130            {
131                    try {
132                            String segmentId = segment.substring(0, 3);
133    
134                            int currentSegmentRepIdx = 0;
135                            if(segmentId2nextRepIdx.containsKey(segmentId))
136                                    currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue();
137                            else
138                                    currentSegmentRepIdx = 0;
139                            segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1));
140    
141                            // will only bother to parse this segment if any of it's contents will 
142                            // be dumped to props.
143                            boolean parseThisSegment = false;
144                            DatumPath segmentIdAsDatumPath = new DatumPath().add(segmentId);
145                            for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
146                                    parseThisSegment = segmentIdAsDatumPath.startsWith(maskIt.next());
147                            for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
148                                    parseThisSegment = maskIt.next().startsWith(segmentIdAsDatumPath);
149    
150                            if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) {
151                                    ER7SegmentHandler handler = new ER7SegmentHandler();
152                                    handler.m_props = props;
153                                    handler.m_encodingChars = encodingChars;
154                                    handler.m_segmentId = segmentId;
155                                    handler.m_msgMask = msgMask;
156                                    handler.m_segmentRepIdx = currentSegmentRepIdx;
157    
158                                    List<Integer> nodeKey = new ArrayList<Integer>();
159                                    nodeKey.add(new Integer(0));
160                                    parseSegmentGuts(handler, segment.substring(4), nodeKey);
161                            }
162                    }
163                    catch(NullPointerException e) {}
164                    catch(IndexOutOfBoundsException e) {}
165            }
166    
167            static protected interface Handler
168            {
169                    public int specDepth();
170                    public char delim(int level);
171    
172                    public void putDatum(List<Integer> nodeKey, String value);
173            }
174    
175            static protected class ER7SegmentHandler implements Handler
176            {
177                    Properties m_props;
178    
179                    EncodingCharacters m_encodingChars;
180    
181                    String m_segmentId;
182                    int m_segmentRepIdx;
183    
184                    List<DatumPath> m_msgMask;
185    
186                    public int specDepth() {return 4;}
187    
188                    public char delim(int level)
189                    {
190                            if(level == 0)
191                                    return m_encodingChars.getFieldSeparator();
192                            else if(level == 1)
193                                    return m_encodingChars.getRepetitionSeparator();
194                            else if(level == 2)
195                                    return m_encodingChars.getComponentSeparator();
196                            else if(level == 3)
197                                    return m_encodingChars.getSubcomponentSeparator();
198                            else
199                                    throw new java.lang.Error();
200                    }
201    
202                    public void putDatum(List<Integer> valNodeKey, String value)
203                    {
204                            // make a DatumPath from valNodeKey and info in this: 
205                            DatumPath valDatumPath = new DatumPath();
206                            valDatumPath.add(m_segmentId).add(m_segmentRepIdx);
207                            for(int i=0; i<valNodeKey.size(); ++i) {
208                                    // valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 
209                                    int itval = ((Integer)valNodeKey.get(i)).intValue();
210                                    valDatumPath.add(new Integer(i == 1 ? itval : itval+1));
211                            }
212    
213                            // see if valDatumPath passes m_msgMask: 
214                            boolean valDatumPathPassesMask = false;
215                            for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 
216                                    !valDatumPathPassesMask && maskIt.hasNext(); )
217                            {
218                                    valDatumPathPassesMask = valDatumPath.startsWith(maskIt.next());
219                            }
220    
221                            if(valDatumPathPassesMask)
222                                    m_props.setProperty(valDatumPath.toString(), value);
223                    }
224            }
225    
226            /** recursively tokenize "guts" (a segment, or part of one) into tokens, 
227            according to separators (aka delimiters) which are different at each level
228            of recursion, and to a recursive depth which is discovered through "handler"
229            via handler.delim(int) and handler.specDepth()  As tokens are found, they
230            are reported to handler via handler.putDatum(), which presumably stashes them
231            away somewhere.  We tell the handler about the location in the message via
232            putDatum()'s key argument, which is a List of Integers representing the 
233            position in the parse tree (size() == depth of recursion).
234    
235            TODO: say more.
236            */
237            protected static void parseSegmentGuts(/*in/out*/ Handler handler,  
238                    /*in*/ String guts, /*in*/List<Integer> nodeKey)
239            {
240                    char thisDepthsDelim = handler.delim(nodeKey.size()-1);
241                    //nodeKey.add(new Integer(0)); // will change nodeKey back before function exits
242    
243                    StringTokenizer gutsTokenizer 
244                            = new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true);
245                    while(gutsTokenizer.hasMoreTokens()) {
246                            String gutsToken = gutsTokenizer.nextToken();
247    
248                            if(gutsToken.charAt(0) == thisDepthsDelim) {
249                                    // gutsToken is all delims -- skipping over as many fields or
250                                    // components or whatevers as there are characters in the token: 
251                                    int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue();
252                                    nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length()));
253                            }
254                            else {
255                                    if(nodeKey.size() < handler.specDepth()) {
256                                            nodeKey.add(new Integer(0));
257                                            parseSegmentGuts(handler, gutsToken, nodeKey);
258                                            nodeKey.remove(nodeKey.size()-1);
259                                    }
260                                    else 
261                                            handler.putDatum(nodeKey, gutsToken);
262                            }
263                    }
264                    //nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func
265            }
266    
267            public static void main(String args[])
268            {
269                    if(args.length >= 1) {
270                            //String message = "MSH|^~\\&||||foo|foo|foo";
271                            System.out.println(args[0]);
272    
273                            Properties props = new Properties();
274    
275                            List<DatumPath> msgMask = new ArrayList<DatumPath>();
276                            msgMask.add(new DatumPath());
277    
278                            System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0]));
279                            props.list(System.out);
280                    }
281            }
282            
283    }
284