001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1
003(the "License"); you may not use this file except in compliance with the License.
004You may obtain a copy of the License at http://www.mozilla.org/MPL/
005Software distributed under the License is distributed on an "AS IS" basis,
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007specific language governing rights and limitations under the License.
008
009The Initial Developer of the Original Code is University Health Network. Copyright (C)
0102001.  All Rights Reserved.
011
012Contributor(s): ______________________________________.
013
014Alternatively, the contents of this file may be used under the terms of the
015GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
016applicable instead of those above.  If you wish to allow use of your version of this
017file only under the terms of the GPL and not to allow others to use your version
018of this file under the MPL, indicate your decision by deleting  the provisions above
019and replace  them with the notice and other provisions required by the GPL License.
020If you do not delete the provisions above, a recipient may use your version of
021this file under either the MPL or the GPL.
022
023*/
024package ca.uhn.hl7v2.parser;
025
026import java.io.File;
027import java.io.FileReader;
028import java.util.ArrayList;
029import java.util.HashSet;
030import java.util.List;
031import java.util.Set;
032
033import javax.xml.parsers.DocumentBuilderFactory;
034
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037import org.w3c.dom.DOMException;
038import org.w3c.dom.Document;
039import org.w3c.dom.Element;
040import org.w3c.dom.Node;
041import org.w3c.dom.NodeList;
042
043import ca.uhn.hl7v2.HL7Exception;
044import ca.uhn.hl7v2.model.Group;
045import ca.uhn.hl7v2.model.Message;
046import ca.uhn.hl7v2.model.Segment;
047import ca.uhn.hl7v2.model.Structure;
048
049/**
050 * <p>A default XMLParser.  This class assigns segment elements (in an XML-encoded message) 
051 * to Segment objects (in a Message object) using the name of a segment and the names 
052 * of any groups in which the segment is nested.  The names of group classes must correspond
053 * to the names of group elements (they must be identical except that a dot in the element 
054 * name, following the message name, is replaced with an underscore, in order to consitute a 
055 * valid class name). </p>
056 * <p>At the time of writing, the group names in the XML spec are changing.  Many of the group 
057 * names have been automatically generated based on the group contents.  However, these automatic 
058 * names are gradually being replaced with manually assigned names.  This process is expected to 
059 * be complete by November 2002.  As a result, mismatches are likely.  Messages could be  
060 * transformed prior to parsing (using XSLT) as a work-around.  Alternatively the group class names 
061 * could be changed to reflect updates in the XML spec.  Ultimately, HAPI group classes will be 
062 * changed to correspond with the official group names, once these are all assigned.  </p>
063 * @author Bryan Tripp
064 */
065public class DefaultXMLParser extends XMLParser {
066
067    private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
068
069    private static final Set<String> ourForceGroupNames;
070    
071    static {
072        ourForceGroupNames = new HashSet<String>();
073        ourForceGroupNames.add("DIET");
074    }
075    
076    /** Creates a new instance of DefaultXMLParser */
077    public DefaultXMLParser() {
078        super();
079    }
080
081    /** 
082     * Creates a new instance of DefaultXMLParser 
083     *  
084     * @param theFactory custom factory to use for model class lookup 
085     */
086    public DefaultXMLParser(ModelClassFactory theFactory) {
087        super(theFactory);
088    }
089    
090    /**
091     * <p>Creates an XML Document that corresponds to the given Message object. </p>
092     * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
093     * into it that correspond to the groups and segments that belong to the message type that your subclass
094     * of XMLParser supports.  Then, for each segment in the message, call the method
095     * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
096     * that segment and the corresponding Segment object from the given Message.</p>
097     */
098    public Document encodeDocument(Message source) throws HL7Exception {
099        String messageClassName = source.getClass().getName();
100        String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
101        org.w3c.dom.Document doc = null;
102        try {
103            doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
104            Element root = doc.createElement(messageName);
105            doc.appendChild(root);
106        }
107        catch (Exception e) {
108            throw new HL7Exception(
109                "Can't create XML document - " + e.getClass().getName(),
110                HL7Exception.APPLICATION_INTERNAL_ERROR,
111                e);
112        }
113        encode(source, doc.getDocumentElement());
114        return doc;
115    }
116
117    /**
118     * Copies data from a group object into the corresponding group element, creating any 
119     * necessary child nodes.  
120     */
121    private void encode(ca.uhn.hl7v2.model.Group groupObject, org.w3c.dom.Element groupElement) throws HL7Exception {
122        String[] childNames = groupObject.getNames();
123        String messageName = groupObject.getMessage().getName();
124        
125        try {
126            for (int i = 0; i < childNames.length; i++) {
127                Structure[] reps = groupObject.getAll(childNames[i]);
128                for (int j = 0; j < reps.length; j++) {
129                    String elementName = makeGroupElementName(messageName, childNames[i]);
130                                        Element childElement;
131                                        try {
132                                                childElement = groupElement.getOwnerDocument().createElement(elementName);
133                                } catch (DOMException e) {
134                                    throw new HL7Exception(
135                                        "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(),
136                                        HL7Exception.APPLICATION_INTERNAL_ERROR,
137                                        e);
138                                }
139                    groupElement.appendChild(childElement);
140                    if (reps[j] instanceof Group) {
141                        encode((Group) reps[j], childElement);
142                    }
143                    else if (reps[j] instanceof Segment) {
144                        encode((Segment) reps[j], childElement);
145                    }
146                }
147            }
148        } catch (DOMException e) {
149            throw new HL7Exception(
150                "Can't encode group " + groupObject.getClass().getName(),
151                HL7Exception.APPLICATION_INTERNAL_ERROR,
152                e);
153        }
154    }
155
156    /**
157     * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
158     * <p>The easiest way to implement this method for a particular message structure is as follows:
159     * <ol><li>Create an instance of the Message type you are going to handle with your subclass
160     * of XMLParser</li>
161     * <li>Go through the given Document and find the Elements that represent the top level of
162     * each message segment. </li>
163     * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
164     * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
165     * At the end of this process, your Message object should be populated with data from the XML
166     * Document.</p>
167     * @throws HL7Exception if the message is not correctly formatted.
168     * @throws EncodingNotSupportedException if the message encoded
169     *     is not supported by this parser.
170     */
171    public Message parseDocument(org.w3c.dom.Document XMLMessage, String version) throws HL7Exception {
172        String messageName = XMLMessage.getDocumentElement().getTagName();
173        Message message = instantiateMessage(messageName, version, true);
174        parse(message, XMLMessage.getDocumentElement());
175        return message;
176    }
177
178    /**
179     * Populates the given group object with data from the given group element, ignoring 
180     * any unrecognized nodes.  
181     */
182    private void parse(ca.uhn.hl7v2.model.Group groupObject, org.w3c.dom.Element groupElement) throws HL7Exception {
183        String[] childNames = groupObject.getNames();
184        String messageName = groupObject.getMessage().getName();
185        
186        NodeList allChildNodes = groupElement.getChildNodes();
187        List<String> unparsedElementList = new ArrayList<String>();
188        for (int i = 0; i < allChildNodes.getLength(); i++) {
189            Node node = allChildNodes.item(i);
190            String name = node.getNodeName();
191            if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
192                unparsedElementList.add(name);                
193            }
194        }
195        
196        //we're not too fussy about order here (all occurrences get parsed as repetitions) ... 
197        for (int i = 0; i < childNames.length; i++) {
198            unparsedElementList.remove(childNames[i]);
199            
200            // 4 char segment names are second occurrences of a segment within a single message
201            // structure. e.g. the second PID segment in an A17 patient swap message is known
202            // to hapi's code represenation as PID2
203            if (childNames[i].length() != 4) {   
204                parseReps(groupElement, groupObject, messageName, childNames[i], childNames[i]);
205            } else {
206                log.debug("Skipping rep segment: {}", childNames[i]);
207            }
208        }
209        
210        for (int i = 0; i < unparsedElementList.size(); i++) {
211            String segName = (String) unparsedElementList.get(i);            
212            String segIndexName = groupObject.addNonstandardSegment(segName);
213            parseReps(groupElement, groupObject, messageName, segName, segIndexName);
214        }
215    }
216    
217    //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2) 
218    private void parseReps(Element groupElement, Group groupObject, 
219            String messageName, String childName, String childIndexName) throws HL7Exception {
220        
221        List<Element> reps = getChildElementsByTagName(groupElement, makeGroupElementName(messageName, childName));
222        log.debug("# of elements matching {}: {}", 
223                        makeGroupElementName(messageName, childName), reps.size());
224
225                if (groupObject.isRepeating(childIndexName)) {
226                        for (int i = 0; i < reps.size(); i++) {
227                                parseRep(reps.get(i), groupObject.get(childIndexName, i));
228                        }                                       
229                } else {
230                        if (reps.size() > 0) {
231                                parseRep(reps.get(0), groupObject.get(childIndexName, 0));                              
232                        }
233
234//                      if (reps.size() > 1) {                       
235//                              String newIndexName = groupObject.addNonstandardSegment(childName);                     
236//                              for (int i = 1; i < reps.size(); i++) {
237//                                      parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
238//                              }                                                               
239//                      }
240                        if (reps.size() > 1) {
241                                String newIndexName = "";
242                                int i=1;
243                                try     {
244                                        for (i = 1; i < reps.size(); i++) {
245                                                newIndexName = childName+(i+1);
246                                                Structure st = groupObject.get(newIndexName);
247                                                parseRep(reps.get(i), st);
248                                        }
249                                } catch(Throwable t) {
250                                        log.info("Issue Parsing: " + t);
251                                        newIndexName = groupObject.addNonstandardSegment(childName);
252                                        for (int j = i; j < reps.size(); j++) {
253                                                parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
254                                        }
255                                }
256                        }
257                        
258                }
259    }
260    
261    private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
262                if (theObj instanceof Group) {
263                        parse((Group) theObj, theElem);
264                }
265                else if (theObj instanceof Segment) {
266                        parse((Segment) theObj, theElem);
267                }                
268                log.debug("Parsed element: {}", theElem.getNodeName());         
269    }
270    
271    //includes direct children only
272    private List<Element> getChildElementsByTagName(Element theElement, String theName) {
273        List<Element> result = new ArrayList<Element>(10);
274        NodeList children = theElement.getChildNodes();
275        
276        for (int i = 0; i < children.getLength(); i++) {
277                Node child = children.item(i);
278                if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(theName)) {
279                        result.add((Element)child);
280                }
281        }
282        
283        return result; 
284    }
285    
286    /** 
287     * Given the name of a group element in an XML message, returns the corresponding 
288     * group class name.  This name is identical except in order to be a valid class 
289     * name, the dot character immediately following the message name is replaced with 
290     * an underscore.  For example, there is a group element called ADT_A01.INSURANCE and the 
291     * corresponding group Class is called ADT_A01_INSURANCE. 
292     */
293//    protected static String makeGroupClassName(String elementName) {
294//        return elementName.replace('.', '_');
295//    }
296
297    /** 
298     * Given the name of a message and a Group class, returns the corresponding group element name in an 
299     * XML-encoded message.  This is the message name and group name separated by a dot. For example, 
300     * ADT_A01.INSURANCE.
301     * 
302     * If it looks like a segment name (i.e. has 3 characters), no change is made. 
303     */
304    protected static String makeGroupElementName(String messageName, String className) {
305        String ret = null;
306        
307        if (className.length() > 4 || ourForceGroupNames.contains(className)) {
308            StringBuilder elementName = new StringBuilder();
309            elementName.append(messageName);
310            elementName.append('.');
311            elementName.append(className);
312            ret = elementName.toString();
313        } else if (className.length() == 4) {
314            // It is not clear why this case is needed.. We should figure out
315                // why it was added, since removing it or optimizing its use would
316                // prevent the need for "ourForGroupNames" above
317                ret = className.substring(0,3);
318        } else {
319            ret = className;
320        }
321        
322        return ret;
323    }
324
325    /** Test harness */
326    public static void main(String args[]) {
327        if (args.length != 1) {
328            System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
329            System.exit(1);
330        }
331
332        //read and parse message from file 
333        try {
334            File messageFile = new File(args[0]);
335            long fileLength = messageFile.length();
336            FileReader r = new FileReader(messageFile);
337            char[] cbuf = new char[(int) fileLength];
338            System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
339            r.close();
340            String messString = String.valueOf(cbuf);
341
342            Parser inParser = null;
343            Parser outParser = null;
344            PipeParser pp = new PipeParser();
345            ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
346            System.out.println("Encoding: " + pp.getEncoding(messString));
347            if (pp.getEncoding(messString) != null) {
348                inParser = pp;
349                outParser = xp;
350            }
351            else if (xp.getEncoding(messString) != null) {
352                inParser = xp;
353                outParser = pp;
354            }
355
356            Message mess = inParser.parse(messString);
357            System.out.println("Got message of type " + mess.getClass().getName());
358
359            String otherEncoding = outParser.encode(mess);
360            System.out.println(otherEncoding);
361        }
362        catch (Exception e) {
363            e.printStackTrace();
364        }
365    }
366
367    /**
368     * {@inheritDoc}
369     */
370        @Override
371        public void parse(Message theMessage, String theString) throws HL7Exception {
372                Document doc = parseStringIntoDocument(theString);
373        parse(theMessage, doc.getDocumentElement());
374        }
375
376
377}