001    /**
002     * The contents of this file are subject to the Mozilla Public License Version 1.1
003     * (the "License"); you may not use this file except in compliance with the License.
004     * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005     * Software distributed under the License is distributed on an "AS IS" basis,
006     * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007     * specific language governing rights and limitations under the License.
008     *
009     * The Original Code is "XMLParser.java".  Description:
010     * "Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
011     * specification."
012     *
013     * The Initial Developer of the Original Code is University Health Network. Copyright (C)
014     * 2002.  All Rights Reserved.
015     *
016     * Contributor(s): ______________________________________.
017     *
018     * Alternatively, the contents of this file may be used under the terms of the
019     * GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
020     * applicable instead of those above.  If you wish to allow use of your version of this
021     * file only under the terms of the GPL and not to allow others to use your version
022     * of this file under the MPL, indicate your decision by deleting  the provisions above
023     * and replace  them with the notice and other provisions required by the GPL License.
024     * If you do not delete the provisions above, a recipient may use your version of
025     * this file under either the MPL or the GPL.
026     */
027    
028    package ca.uhn.hl7v2.parser;
029    
030    import java.io.File;
031    import java.io.FileReader;
032    import java.io.IOException;
033    import java.io.StringReader;
034    import java.io.StringWriter;
035    import java.util.HashSet;
036    import java.util.Set;
037    
038    import javax.xml.parsers.DocumentBuilder;
039    import javax.xml.parsers.DocumentBuilderFactory;
040    
041    import org.apache.xerces.parsers.DOMParser;
042    import org.apache.xerces.parsers.StandardParserConfiguration;
043    import org.apache.xml.serialize.OutputFormat;
044    import org.apache.xml.serialize.XMLSerializer;
045    import org.slf4j.Logger;
046    import org.slf4j.LoggerFactory;
047    import org.w3c.dom.DOMException;
048    import org.w3c.dom.Document;
049    import org.w3c.dom.Element;
050    import org.w3c.dom.Node;
051    import org.w3c.dom.NodeList;
052    import org.w3c.dom.Text;
053    import org.xml.sax.InputSource;
054    import org.xml.sax.SAXException;
055    
056    import ca.uhn.hl7v2.HL7Exception;
057    import ca.uhn.hl7v2.model.Composite;
058    import ca.uhn.hl7v2.model.DataTypeException;
059    import ca.uhn.hl7v2.model.GenericComposite;
060    import ca.uhn.hl7v2.model.GenericMessage;
061    import ca.uhn.hl7v2.model.GenericPrimitive;
062    import ca.uhn.hl7v2.model.Message;
063    import ca.uhn.hl7v2.model.Primitive;
064    import ca.uhn.hl7v2.model.Segment;
065    import ca.uhn.hl7v2.model.Structure;
066    import ca.uhn.hl7v2.model.Type;
067    import ca.uhn.hl7v2.model.Varies;
068    import ca.uhn.hl7v2.util.Terser;
069    
070    /**
071     * Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
072     * specification.  This is an abstract class that handles datatype and segment parsing/encoding,
073     * but not the parsing/encoding of entire messages.  To use the XML parser, you should create a
074     * subclass for a certain message structure.  This subclass must be able to identify the Segment
075     * objects that correspond to various Segment nodes in an XML document, and call the methods <code>
076     * parse(Segment segment, ElementNode segmentNode)</code> and <code>encode(Segment segment, ElementNode segmentNode)
077     * </code> as appropriate.  XMLParser uses the Xerces parser, which must be installed in your classpath.
078     * @author Bryan Tripp, Shawn Bellina
079     */
080    public abstract class XMLParser extends Parser {
081    
082        private static final Logger log = LoggerFactory.getLogger(XMLParser.class);
083    
084        private DOMParser parser;
085        private String textEncoding;
086    
087        /**
088         * The nodes whose names match these strings will be kept as original, 
089         * meaning that no white space treaming will occur on them
090         */
091        private String[] keepAsOriginalNodes;
092    
093        /**
094         * All keepAsOriginalNodes names, concatenated by a pipe (|)
095         */
096        private String concatKeepAsOriginalNodes = "";
097    
098        /** Constructor */
099        public XMLParser() {
100            this(null);
101        }
102    
103        /** 
104         * Constructor
105         *  
106         * @param theFactory custom factory to use for model class lookup 
107         */
108        public XMLParser(ModelClassFactory theFactory) {
109            super(theFactory);
110            parser = new DOMParser(new StandardParserConfiguration());
111            try {
112                parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
113            }
114            catch (Exception e) {
115                log.error("Can't exclude whitespace from XML DOM", e);
116            }
117        }
118        
119        /**
120         * Returns a String representing the encoding of the given message, if
121         * the encoding is recognized.  For example if the given message appears
122         * to be encoded using HL7 2.x XML rules then "XML" would be returned.
123         * If the encoding is not recognized then null is returned.  That this
124         * method returns a specific encoding does not guarantee that the
125         * message is correctly encoded (e.g. well formed XML) - just that
126         * it is not encoded using any other encoding than the one returned.
127         * Returns null if the encoding is not recognized.
128         */
129        public String getEncoding(String message) {
130            if (EncodingDetector.isXmlEncoded(message)) {
131                return "XML";
132            }
133            return null;
134        }
135    
136        
137        /**
138         * Returns true if and only if the given encoding is supported
139         * by this Parser.
140         */
141        public boolean supportsEncoding(String encoding) {
142            if (encoding.equals("XML")) {
143                return true;
144            }
145            else {
146                return false;
147            }
148        }
149    
150        /**
151         * @return the preferred encoding of this Parser
152         */
153        public String getDefaultEncoding() {
154            return "XML";
155        }
156        
157        /**
158         * Sets the <i>keepAsOriginalNodes<i>
159         * 
160         * The nodes whose names match the <i>keepAsOriginalNodes<i> will be kept as original, 
161         * meaning that no white space treaming will occur on them
162         */
163        public void setKeepAsOriginalNodes(String[] keepAsOriginalNodes) {
164            this.keepAsOriginalNodes = keepAsOriginalNodes;
165    
166            if (keepAsOriginalNodes.length != 0) {
167                //initializes the         
168                StringBuffer strBuf = new StringBuffer(keepAsOriginalNodes[0]);
169                for (int i = 1; i < keepAsOriginalNodes.length; i++) {
170                    strBuf.append("|");
171                    strBuf.append(keepAsOriginalNodes[i]);
172                }
173                concatKeepAsOriginalNodes = strBuf.toString();
174            }
175            else {
176                concatKeepAsOriginalNodes = "";
177            }
178        }
179    
180        /**
181         * Sets the <i>keepAsOriginalNodes<i>
182         */
183        public String[] getKeepAsOriginalNodes() {
184            return keepAsOriginalNodes;
185        }
186    
187        /**
188         * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
189         * <p>The easiest way to implement this method for a particular message structure is as follows:
190         * <ol><li>Create an instance of the Message type you are going to handle with your subclass
191         * of XMLParser</li>
192         * <li>Go through the given Document and find the Elements that represent the top level of
193         * each message segment. </li>
194         * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
195         * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
196         * At the end of this process, your Message object should be populated with data from the XML
197         * Document.</p>
198         * @throws HL7Exception if the message is not correctly formatted.
199         * @throws EncodingNotSupportedException if the message encoded
200         *      is not supported by this parser.
201         */
202        public abstract Message parseDocument(Document XMLMessage, String version) throws HL7Exception;
203    
204        /**
205         * <p>Parses a message string and returns the corresponding Message
206         * object.  This method checks that the given message string is XML encoded, creates an
207         * XML Document object (using Xerces) from the given String, and calls the abstract
208         * method <code>parse(Document XMLMessage)</code></p>
209         */
210        protected Message doParse(String message, String version) throws HL7Exception, EncodingNotSupportedException {
211            Message m = null;
212    
213            //parse message string into a DOM document 
214            Document doc = null;
215            doc = parseStringIntoDocument(message);
216            m = parseDocument(doc, version);
217    
218            return m;
219        }
220    
221        /**
222         * Parses a string containing an XML document into a Document object.
223         * 
224         * Note that this method is synchronized currently, as the XML parser is
225         * not thread safe
226         * @throws HL7Exception 
227         */
228            protected synchronized Document parseStringIntoDocument(String message) throws HL7Exception {
229                    Document doc;
230                    try {
231                            parser.parse(new InputSource(new StringReader(message)));
232            }
233            catch (SAXException e) {
234                throw new HL7Exception("SAXException parsing XML", HL7Exception.APPLICATION_INTERNAL_ERROR, e);
235            }
236            catch (IOException e) {
237                throw new HL7Exception("IOException parsing XML", HL7Exception.APPLICATION_INTERNAL_ERROR, e);
238            }
239                    
240                    doc = parser.getDocument();
241                    return doc;
242            }
243    
244        /**
245         * Formats a Message object into an HL7 message string using the given
246         * encoding.
247         * @throws HL7Exception if the data fields in the message do not permit encoding
248         *      (e.g. required fields are null)
249         * @throws EncodingNotSupportedException if the requested encoding is not
250         *      supported by this parser.
251         */
252        protected String doEncode(Message source, String encoding) throws HL7Exception, EncodingNotSupportedException {
253            if (!encoding.equals("XML"))
254                throw new EncodingNotSupportedException("XMLParser supports only XML encoding");
255            return encode(source);
256        }
257    
258        /**
259         * Formats a Message object into an HL7 message string using this parser's
260         * default encoding (XML encoding). This method calls the abstract method
261         * <code>encodeDocument(...)</code> in order to obtain XML Document object
262         * representation of the Message, then serializes it to a String.
263         * @throws HL7Exception if the data fields in the message do not permit encoding
264         *      (e.g. required fields are null)
265         */
266        protected String doEncode(Message source) throws HL7Exception {
267            if (source instanceof GenericMessage) {
268                throw new HL7Exception("Can't XML-encode a GenericMessage.  Message must have a recognized structure.");
269            }
270            
271            Document doc = encodeDocument(source);
272            Element documentElement = doc.getDocumentElement();
273                    documentElement.setAttribute("xmlns", "urn:hl7-org:v2xml");
274            
275            StringWriter out = new StringWriter();
276    
277            OutputFormat outputFormat = new OutputFormat("", null, true);
278            outputFormat.setLineWidth(0);
279            
280            if (textEncoding != null) {
281                    outputFormat.setEncoding(textEncoding);
282            }
283            
284            XMLSerializer ser = new XMLSerializer(out, outputFormat); //default output format
285            try {
286                ser.serialize(doc);
287            }
288            catch (IOException e) {
289                throw new HL7Exception(
290                    "IOException serializing XML document to string",
291                    HL7Exception.APPLICATION_INTERNAL_ERROR,
292                    e);
293            }
294            return out.toString();
295        }
296    
297        /**
298         * <p>Creates an XML Document that corresponds to the given Message object. </p>
299         * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
300         * into it that correspond to the groups and segments that belong to the message type that your subclass
301         * of XMLParser supports.  Then, for each segment in the message, call the method
302         * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
303         * that segment and the corresponding Segment object from the given Message.</p>
304         */
305        public abstract Document encodeDocument(Message source) throws HL7Exception;
306    
307        /** 
308         * Populates the given Segment object with data from the given XML Element.
309         * @throws HL7Exception if the XML Element does not have the correct name and structure
310         *      for the given Segment, or if there is an error while setting individual field values.
311         */
312        public void parse(Segment segmentObject, Element segmentElement) throws HL7Exception {
313            Set<String> done = new HashSet<String>();
314            
315    //        for (int i = 1; i <= segmentObject.numFields(); i++) {
316    //            String elementName = makeElementName(segmentObject, i);
317    //            done.add(elementName);
318    //            parseReps(segmentObject, segmentElement, elementName, i);
319    //        }
320            
321            NodeList all = segmentElement.getChildNodes();
322            for (int i = 0; i < all.getLength(); i++) {
323                String elementName = all.item(i).getNodeName();
324                if (all.item(i).getNodeType() == Node.ELEMENT_NODE && !done.contains(elementName)) {
325                    done.add(elementName);
326                    
327                    int index = elementName.indexOf('.');
328                    if (index >= 0 && elementName.length() > index) { //properly formatted element
329                        String fieldNumString = elementName.substring(index + 1);
330                        int fieldNum = Integer.parseInt(fieldNumString);
331                        parseReps(segmentObject, segmentElement, elementName, fieldNum);                        
332                    } else {                        
333                        log.debug("Child of segment {} doesn't look like a field {}",
334                                    segmentObject.getName(), elementName);
335                    }
336                }
337            }
338    
339            //set data type of OBX-5        
340            if (segmentObject.getClass().getName().indexOf("OBX") >= 0) {
341                Varies.fixOBX5(segmentObject, getFactory());
342            }
343        }
344        
345        private void parseReps(Segment segmentObject, Element segmentElement, String fieldName, int fieldNum) 
346                 throws DataTypeException, HL7Exception {
347            
348            NodeList reps = segmentElement.getElementsByTagName(fieldName);
349            for (int i = 0; i < reps.getLength(); i++) {
350                parse(segmentObject.getField(fieldNum, i), (Element) reps.item(i));
351            }        
352        }
353    
354        /**
355         * Populates the given Element with data from the given Segment, by inserting
356         * Elements corresponding to the Segment's fields, their components, etc.  Returns 
357         * true if there is at least one data value in the segment.   
358         */
359        public boolean encode(Segment segmentObject, Element segmentElement) throws HL7Exception {
360            boolean hasValue = false;
361            int n = segmentObject.numFields();
362            for (int i = 1; i <= n; i++) {
363                String name = makeElementName(segmentObject, i);
364                Type[] reps = segmentObject.getField(i);
365                for (int j = 0; j < reps.length; j++) {
366                    Element newNode = segmentElement.getOwnerDocument().createElement(name);
367                    boolean componentHasValue = encode(reps[j], newNode);
368                    if (componentHasValue) {
369                        try {
370                            segmentElement.appendChild(newNode);
371                        }
372                        catch (DOMException e) {
373                            throw new HL7Exception(
374                                "DOMException encoding Segment: ",
375                                HL7Exception.APPLICATION_INTERNAL_ERROR,
376                                e);
377                        }
378                        hasValue = true;
379                    }
380                }
381            }
382            return hasValue;
383        }
384    
385        /**
386         * Populates the given Type object with data from the given XML Element.
387         */
388        public void parse(Type datatypeObject, Element datatypeElement) throws DataTypeException {
389            if (datatypeObject instanceof Varies) {
390                parseVaries((Varies) datatypeObject, datatypeElement);
391            }
392            else if (datatypeObject instanceof Primitive) {
393                parsePrimitive((Primitive) datatypeObject, datatypeElement);
394            }
395            else if (datatypeObject instanceof Composite) {
396                parseComposite((Composite) datatypeObject, datatypeElement);
397            }
398        }
399    
400        /**
401         * Parses an XML element into a Varies by determining whether the element is primitive or 
402         * composite, calling setData() on the Varies with a new generic primitive or composite as appropriate, 
403         * and then calling parse again with the new Type object.  
404         */
405        private void parseVaries(Varies datatypeObject, Element datatypeElement) throws DataTypeException {
406            //figure out what data type it holds 
407            //short nodeType = datatypeElement.getFirstChild().getNodeType();        
408            if (!hasChildElement(datatypeElement)) {
409                //it's a primitive 
410                datatypeObject.setData(new GenericPrimitive(datatypeObject.getMessage()));
411            }
412            else {
413                //it's a composite ... almost know what type, except that we don't have the version here 
414                datatypeObject.setData(new GenericComposite(datatypeObject.getMessage()));
415            }
416            parse(datatypeObject.getData(), datatypeElement);
417        }
418    
419        /** Returns true if any of the given element's children are elements */
420        private boolean hasChildElement(Element e) {
421            NodeList children = e.getChildNodes();
422            boolean hasElement = false;
423            int c = 0;
424            while (c < children.getLength() && !hasElement) {
425                if (children.item(c).getNodeType() == Node.ELEMENT_NODE) {
426                    hasElement = true;
427                }
428                c++;
429            }
430            return hasElement;
431        }
432    
433        /** Parses a primitive type by filling it with text child, if any */
434        private void parsePrimitive(Primitive datatypeObject, Element datatypeElement) throws DataTypeException {
435            NodeList children = datatypeElement.getChildNodes();
436            int c = 0;
437            boolean full = false;
438            while (c < children.getLength() && !full) {
439                Node child = children.item(c++);
440                if (child.getNodeType() == Node.TEXT_NODE) {
441                    try {
442                        if (child.getNodeValue() != null && !child.getNodeValue().equals("")) {
443                            if (keepAsOriginal(child.getParentNode())) {
444                                datatypeObject.setValue(child.getNodeValue());
445                            }
446                            else {
447                                datatypeObject.setValue(removeWhitespace(child.getNodeValue()));
448                            }
449                        }
450                    }
451                    catch (DOMException e) {
452                        log.error("Error parsing primitive value from TEXT_NODE", e);
453                    }
454                    full = true;
455                }
456            }
457        }
458    
459        /**
460         * Checks if <code>Node</code> content should be kept as original (ie.: whitespaces won't be removed)
461         * 
462         * @param node The target <code>Node</code> 
463         * @return boolean <code>true</code> if whitespaces should not be removed from node content, 
464         *                 <code>false</code> otherwise
465         */
466        protected boolean keepAsOriginal(Node node) {
467            if (node.getNodeName() == null)
468                return false;
469            return concatKeepAsOriginalNodes.indexOf(node.getNodeName()) != -1;
470        }
471    
472        /** 
473         * Removes all unecessary whitespace from the given String (intended to be used with Primitive values).  
474         * This includes leading and trailing whitespace, and repeated space characters.  Carriage returns, 
475         * line feeds, and tabs are replaced with spaces. 
476         */
477        protected String removeWhitespace(String s) {
478            s = s.replace('\r', ' ');
479            s = s.replace('\n', ' ');
480            s = s.replace('\t', ' ');
481    
482            boolean repeatedSpacesExist = true;
483            while (repeatedSpacesExist) {
484                int loc = s.indexOf("  ");
485                if (loc < 0) {
486                    repeatedSpacesExist = false;
487                }
488                else {
489                    StringBuffer buf = new StringBuffer();
490                    buf.append(s.substring(0, loc));
491                    buf.append(" ");
492                    buf.append(s.substring(loc + 2));
493                    s = buf.toString();
494                }
495            }
496            return s.trim();
497        }
498    
499        /**
500         * Populates a Composite type by looping through it's children, finding corresponding 
501         * Elements among the children of the given Element, and calling parse(Type, Element) for
502         * each.
503         */
504        private void parseComposite(Composite datatypeObject, Element datatypeElement) throws DataTypeException {
505            if (datatypeObject instanceof GenericComposite) { //elements won't be named GenericComposite.x
506                NodeList children = datatypeElement.getChildNodes();
507                int compNum = 0;
508                for (int i = 0; i < children.getLength(); i++) {
509                    if (children.item(i).getNodeType() == Node.ELEMENT_NODE) {
510                        Element nextElement = (Element) children.item(i);
511                        String localName = nextElement.getLocalName();
512                        int dotIndex = localName.indexOf(".");
513                        if (dotIndex > -1) {
514                            compNum = Integer.parseInt(localName.substring(dotIndex + 1)) - 1;
515                        } else {
516                            log.debug("Datatype element {} doesn't have a valid numbered name, usgin default index of {}",
517                                            datatypeElement.getLocalName(), compNum);
518                        }
519                        Type nextComponent = datatypeObject.getComponent(compNum);
520                        parse(nextComponent, nextElement);
521                        compNum++;
522                    }
523                }
524            }
525            else {
526                Type[] children = datatypeObject.getComponents();
527                for (int i = 0; i < children.length; i++) {
528                    NodeList matchingElements =
529                        datatypeElement.getElementsByTagName(makeElementName(datatypeObject, i + 1));
530                    if (matchingElements.getLength() > 0) {
531                        parse(children[i], (Element) matchingElements.item(0)); //components don't repeat - use 1st
532                    }
533                }
534            }
535        }
536    
537        /** 
538         * Returns the expected XML element name for the given child of a message constituent 
539         * of the given class (the class should be a Composite or Segment class). 
540         */
541        /*private String makeElementName(Class c, int child) {
542            String longClassName = c.getName();
543            String shortClassName = longClassName.substring(longClassName.lastIndexOf('.') + 1, longClassName.length());
544            if (shortClassName.startsWith("Valid")) {
545                shortClassName = shortClassName.substring(5, shortClassName.length());
546            }
547            return shortClassName + "." + child;
548        }*/
549    
550        /** Returns the expected XML element name for the given child of the given Segment */
551        private String makeElementName(Segment s, int child) {
552            return s.getName() + "." + child;
553        }
554    
555        /** Returns the expected XML element name for the given child of the given Composite */
556        private String makeElementName(Composite composite, int child) {
557            return composite.getName() + "." + child;
558        }
559    
560        /**
561         * Populates the given Element with data from the given Type, by inserting
562         * Elements corresponding to the Type's components and values.  Returns true if 
563         * the given type contains a value (i.e. for Primitives, if getValue() doesn't 
564         * return null, and for Composites, if at least one underlying Primitive doesn't 
565         * return null).
566         */
567        private boolean encode(Type datatypeObject, Element datatypeElement) throws DataTypeException {
568            boolean hasData = false;
569            if (datatypeObject instanceof Varies) {
570                hasData = encodeVaries((Varies) datatypeObject, datatypeElement);
571            }
572            else if (datatypeObject instanceof Primitive) {
573                hasData = encodePrimitive((Primitive) datatypeObject, datatypeElement);
574            }
575            else if (datatypeObject instanceof Composite) {
576                hasData = encodeComposite((Composite) datatypeObject, datatypeElement);
577            }
578            return hasData;
579        }
580    
581        /**
582         * Encodes a Varies type by extracting it's data field and encoding that.  Returns true 
583         * if the data field (or one of its components) contains a value.  
584         */
585        private boolean encodeVaries(Varies datatypeObject, Element datatypeElement) throws DataTypeException {
586            boolean hasData = false;
587            if (datatypeObject.getData() != null) {
588                hasData = encode(datatypeObject.getData(), datatypeElement);
589            }
590            return hasData;
591        }
592    
593        /** 
594         * Encodes a Primitive in XML by adding it's value as a child of the given Element.  
595         * Returns true if the given Primitive contains a value.  
596         */
597        private boolean encodePrimitive(Primitive datatypeObject, Element datatypeElement) throws DataTypeException {
598            boolean hasValue = false;
599            if (datatypeObject.getValue() != null && !datatypeObject.getValue().equals(""))
600                hasValue = true;
601    
602            Text t = datatypeElement.getOwnerDocument().createTextNode(datatypeObject.getValue());
603            if (hasValue) {
604                try {
605                    datatypeElement.appendChild(t);
606                }
607                catch (DOMException e) {
608                    throw new DataTypeException("DOMException encoding Primitive: ", e);
609                }
610            }
611            return hasValue;
612        }
613    
614        /**
615         * Encodes a Composite in XML by looping through it's components, creating new 
616         * children for each of them (with the appropriate names) and populating them by 
617         * calling encode(Type, Element) using these children.  Returns true if at least 
618         * one component contains a value.  
619         */
620        private boolean encodeComposite(Composite datatypeObject, Element datatypeElement) throws DataTypeException {
621            Type[] components = datatypeObject.getComponents();
622            boolean hasValue = false;
623            for (int i = 0; i < components.length; i++) {
624                String name = makeElementName(datatypeObject, i + 1);
625                Element newNode = datatypeElement.getOwnerDocument().createElement(name);
626                boolean componentHasValue = encode(components[i], newNode);
627                if (componentHasValue) {
628                    try {
629                        datatypeElement.appendChild(newNode);
630                    }
631                    catch (DOMException e) {
632                        throw new DataTypeException("DOMException encoding Composite: ", e);
633                    }
634                    hasValue = true;
635                }
636            }
637            return hasValue;
638        }
639    
640        /**
641         * <p>Returns a minimal amount of data from a message string, including only the
642         * data needed to send a response to the remote system.  This includes the
643         * following fields:
644         * <ul><li>field separator</li>
645         * <li>encoding characters</li>
646         * <li>processing ID</li>
647         * <li>message control ID</li></ul>
648         * This method is intended for use when there is an error parsing a message,
649         * (so the Message object is unavailable) but an error message must be sent
650         * back to the remote system including some of the information in the inbound
651         * message.  This method parses only that required information, hopefully
652         * avoiding the condition that caused the original error.</p>
653         */
654        public Segment getCriticalResponseData(String message) throws HL7Exception {
655            String version = getVersion(message);
656            Segment criticalData = Parser.makeControlMSH(version, getFactory());
657    
658            Terser.set(criticalData, 1, 0, 1, 1, parseLeaf(message, "MSH.1", 0));
659            Terser.set(criticalData, 2, 0, 1, 1, parseLeaf(message, "MSH.2", 0));
660            Terser.set(criticalData, 10, 0, 1, 1, parseLeaf(message, "MSH.10", 0));
661            String procID = parseLeaf(message, "MSH.11", 0);
662            if (procID == null || procID.length() == 0) {
663                procID = parseLeaf(message, "PT.1", message.indexOf("MSH.11"));
664                //this field is a composite in later versions
665            }
666            Terser.set(criticalData, 11, 0, 1, 1, procID);
667    
668            return criticalData;
669        }
670    
671        /**
672         * For response messages, returns the value of MSA-2 (the message ID of the message
673         * sent by the sending system).  This value may be needed prior to main message parsing,
674         * so that (particularly in a multi-threaded scenario) the message can be routed to
675         * the thread that sent the request.  We need this information first so that any
676         * parse exceptions are thrown to the correct thread.  Implementers of Parsers should
677         * take care to make the implementation of this method very fast and robust.
678         * Returns null if MSA-2 can not be found (e.g. if the message is not a
679         * response message).  Trims whitespace from around the MSA-2 field.  
680         */
681        public String getAckID(String message) {
682            String ackID = null;
683            try {
684                ackID = parseLeaf(message, "msa.2", 0).trim();
685            }
686            catch (HL7Exception e) { /* OK ... assume it isn't a response message */
687            }
688            return ackID;
689        }
690    
691        public String getVersion(String message) throws HL7Exception {
692            String version = parseLeaf(message, "MSH.12", 0);
693            if (version == null || version.trim().length() == 0) {
694                version = parseLeaf(message, "VID.1", message.indexOf("MSH.12"));
695            }
696            return version;
697        }
698    
699        /**
700         * Attempts to retrieve the value of a leaf tag without using DOM or SAX.  
701         * This method searches the given message string for the given tag name, and returns 
702         * everything after the given tag and before the start of the next tag.  Whitespace
703         * is stripped.  This is intended only for lead nodes, as the value is considered to 
704         * end at the start of the next tag, regardless of whether it is the matching end 
705         * tag or some other nested tag.  
706         * @param message a string message in XML form
707         * @param tagName the name of the XML tag, e.g. "MSA.2"
708         * @param startAt the character location at which to start searching
709         * @throws HL7Exception if the tag can not be found
710         */
711        protected String parseLeaf(String message, String tagName, int startAt) throws HL7Exception {
712            String value = null;
713    
714            int tagStart = message.indexOf("<" + tagName, startAt);
715            if (tagStart < 0)
716                tagStart = message.indexOf("<" + tagName.toUpperCase(), startAt);
717            int valStart = message.indexOf(">", tagStart) + 1;
718            int valEnd = message.indexOf("<", valStart);
719    
720            if (tagStart >= 0 && valEnd >= valStart) {
721                value = message.substring(valStart, valEnd);
722            }
723            else {
724                throw new HL7Exception(
725                    "Couldn't find "
726                        + tagName
727                        + " in message beginning: "
728                        + message.substring(0, Math.min(150, message.length())),
729                    HL7Exception.REQUIRED_FIELD_MISSING);
730            }
731    
732            // Escape codes, as defined at http://hdf.ncsa.uiuc.edu/HDF5/XML/xml_escape_chars.htm
733            value = value.replaceAll("&quot;", "\"");
734            value = value.replaceAll("&apos;", "'");
735            value = value.replaceAll("&amp;", "&");
736            value = value.replaceAll("&lt;", "<");
737            value = value.replaceAll("&gt;", ">");
738    
739            return value;
740        }
741    
742        /**
743         * Throws unsupported operation exception
744         *
745         * @throws Unsupported operation exception
746         */
747        @Override
748        public String doEncode(Segment structure, EncodingCharacters encodingCharacters) throws HL7Exception {
749            throw new UnsupportedOperationException("Not supported yet.");
750        }
751    
752        /**
753         * Throws unsupported operation exception
754         *
755         * @throws Unsupported operation exception
756         */
757        @Override
758            protected Message doParseForSpecificPackage(String theMessage, String theVersion, String thePackageName) throws HL7Exception, EncodingNotSupportedException {
759            throw new UnsupportedOperationException("Not supported yet.");
760            }
761    
762            /**
763         * Throws unsupported operation exception
764         *
765         * @throws Unsupported operation exception
766         */
767        @Override
768        public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception {
769            throw new UnsupportedOperationException("Not supported yet.");
770        }
771    
772        /**
773         * Throws unsupported operation exception
774         *
775         * @throws Unsupported operation exception
776         */
777        @Override
778        public void parse(Type type, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
779            throw new UnsupportedOperationException("Not supported yet.");
780        }
781    
782        /**
783         * Throws unsupported operation exception
784         *
785         * @throws Unsupported operation exception
786         */
787        @Override
788        public void parse(Segment segment, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
789            throw new UnsupportedOperationException("Not supported yet.");
790        }
791    
792    
793        /** Test harness */
794        public static void main(String args[]) {
795            if (args.length != 1) {
796                System.out.println("Usage: XMLParser pipe_encoded_file");
797                System.exit(1);
798            }
799    
800            //read and parse message from file 
801            try {
802                PipeParser parser = new PipeParser();
803                File messageFile = new File(args[0]);
804                long fileLength = messageFile.length();
805                FileReader r = new FileReader(messageFile);
806                char[] cbuf = new char[(int) fileLength];
807                System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
808                r.close();
809                String messString = String.valueOf(cbuf);
810                Message mess = parser.parse(messString);
811                System.out.println("Got message of type " + mess.getClass().getName());
812    
813                ca.uhn.hl7v2.parser.XMLParser xp = new XMLParser() {
814                    public Message parseDocument(Document XMLMessage, String version) throws HL7Exception {
815                        return null;
816                    }
817                    public Document encodeDocument(Message source) throws HL7Exception {
818                        return null;
819                    }
820                    public String getVersion(String message) throws HL7Exception {
821                        return null;
822                    }
823    
824                    @Override
825                    public void parse(Message message, String string) throws HL7Exception {
826                        throw new UnsupportedOperationException("Not supported yet.");
827                    }
828    
829                    @Override
830                    public String doEncode(Segment structure, EncodingCharacters encodingCharacters) throws HL7Exception {
831                        throw new UnsupportedOperationException("Not supported yet.");
832                    }
833    
834                    @Override
835                    public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception {
836                        throw new UnsupportedOperationException("Not supported yet.");
837                    }
838    
839                    @Override
840                    public void parse(Type type, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
841                        throw new UnsupportedOperationException("Not supported yet.");
842                    }
843    
844                    @Override
845                    public void parse(Segment segment, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
846                        throw new UnsupportedOperationException("Not supported yet.");
847                    }
848                                    @Override
849                                    protected Message doParseForSpecificPackage(String theMessage, String theVersion, String thePackageName) throws HL7Exception, EncodingNotSupportedException {
850                        throw new UnsupportedOperationException("Not supported yet.");
851                                    }
852                };
853    
854                //loop through segment children of message, encode, print to console
855                String[] structNames = mess.getNames();
856                for (int i = 0; i < structNames.length; i++) {
857                    Structure[] reps = mess.getAll(structNames[i]);
858                    for (int j = 0; j < reps.length; j++) {
859                        if (Segment.class.isAssignableFrom(reps[j].getClass())) { //ignore groups
860                            DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
861                            Document doc = docBuilder.newDocument(); //new doc for each segment
862                            Element root = doc.createElement(reps[j].getClass().getName());
863                            doc.appendChild(root);
864                            xp.encode((Segment) reps[j], root);
865                            StringWriter out = new StringWriter();
866                            XMLSerializer ser = new XMLSerializer(out, null); //default output format
867                            ser.serialize(doc);
868                            System.out.println("Segment " + reps[j].getClass().getName() + ": \r\n" + out.toString());
869    
870                            Class<?>[] segmentConstructTypes = { Message.class };
871                            Object[] segmentConstructArgs = { null };
872                            Segment s =
873                                (Segment) reps[j].getClass().getConstructor(segmentConstructTypes).newInstance(
874                                    segmentConstructArgs);
875                            xp.parse(s, root);
876                            Document doc2 = docBuilder.newDocument();
877                            Element root2 = doc2.createElement(s.getClass().getName());
878                            doc2.appendChild(root2);
879                            xp.encode(s, root2);
880                            StringWriter out2 = new StringWriter();
881                            ser = new XMLSerializer(out2, null); //default output format
882                            ser.serialize(doc2);
883                            if (out2.toString().equals(out.toString())) {
884                                System.out.println("Re-encode OK");
885                            }
886                            else {
887                                System.out.println(
888                                    "Warning: XML different after parse and re-encode: \r\n" + out2.toString());
889                            }
890                        }
891                    }
892                }
893    
894            }
895            catch (Exception e) {
896                e.printStackTrace();
897            }
898        }
899    
900        /**
901         * Returns the text encoding to be used in generating new messages. Note that this affects encoding to string only, not parsing.
902         * @return
903         */
904            public String getTextEncoding() {
905                    return textEncoding;
906            }
907    
908            /**
909             * Sets the text encoding to be used in generating new messages. Note that this affects encoding to string only, not parsing.
910             * @param textEncoding The encoding. Default is the platform default.
911             */
912            public void setTextEncoding(String textEncoding) {
913                    this.textEncoding = textEncoding;
914            }
915    
916    }