001    /**
002    The contents of this file are subject to the Mozilla Public License Version 1.1
003    (the "License"); you may not use this file except in compliance with the License.
004    You may obtain a copy of the License at http://www.mozilla.org/MPL/
005    Software distributed under the License is distributed on an "AS IS" basis,
006    WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007    specific language governing rights and limitations under the License.
008    
009    The Initial Developer of the Original Code is University Health Network. Copyright (C)
010    2001.  All Rights Reserved.
011    
012    Contributor(s): ______________________________________.
013    
014    Alternatively, the contents of this file may be used under the terms of the
015    GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
016    applicable instead of those above.  If you wish to allow use of your version of this
017    file only under the terms of the GPL and not to allow others to use your version
018    of this file under the MPL, indicate your decision by deleting  the provisions above
019    and replace  them with the notice and other provisions required by the GPL License.
020    If you do not delete the provisions above, a recipient may use your version of
021    this file under either the MPL or the GPL.
022    
023    */
024    package ca.uhn.hl7v2.parser;
025    
026    import java.io.File;
027    import java.io.FileReader;
028    import java.util.ArrayList;
029    import java.util.HashSet;
030    import java.util.List;
031    import java.util.Set;
032    
033    import javax.xml.parsers.DocumentBuilderFactory;
034    
035    import org.slf4j.Logger;
036    import org.slf4j.LoggerFactory;
037    import org.w3c.dom.DOMException;
038    import org.w3c.dom.Document;
039    import org.w3c.dom.Element;
040    import org.w3c.dom.Node;
041    import org.w3c.dom.NodeList;
042    
043    import ca.uhn.hl7v2.HL7Exception;
044    import ca.uhn.hl7v2.model.Group;
045    import ca.uhn.hl7v2.model.Message;
046    import ca.uhn.hl7v2.model.Segment;
047    import ca.uhn.hl7v2.model.Structure;
048    
049    /**
050     * <p>A default XMLParser.  This class assigns segment elements (in an XML-encoded message) 
051     * to Segment objects (in a Message object) using the name of a segment and the names 
052     * of any groups in which the segment is nested.  The names of group classes must correspond
053     * to the names of group elements (they must be identical except that a dot in the element 
054     * name, following the message name, is replaced with an underscore, in order to consitute a 
055     * valid class name). </p>
056     * <p>At the time of writing, the group names in the XML spec are changing.  Many of the group 
057     * names have been automatically generated based on the group contents.  However, these automatic 
058     * names are gradually being replaced with manually assigned names.  This process is expected to 
059     * be complete by November 2002.  As a result, mismatches are likely.  Messages could be  
060     * transformed prior to parsing (using XSLT) as a work-around.  Alternatively the group class names 
061     * could be changed to reflect updates in the XML spec.  Ultimately, HAPI group classes will be 
062     * changed to correspond with the official group names, once these are all assigned.  </p>
063     * @author Bryan Tripp
064     */
065    public class DefaultXMLParser extends XMLParser {
066    
067        private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
068    
069        private static final Set<String> ourForceGroupNames;
070        
071        static {
072            ourForceGroupNames = new HashSet<String>();
073            ourForceGroupNames.add("DIET");
074        }
075        
076        /** Creates a new instance of DefaultXMLParser */
077        public DefaultXMLParser() {
078            super();
079        }
080    
081        /** 
082         * Creates a new instance of DefaultXMLParser 
083         *  
084         * @param theFactory custom factory to use for model class lookup 
085         */
086        public DefaultXMLParser(ModelClassFactory theFactory) {
087            super(theFactory);
088        }
089        
090        /**
091         * <p>Creates an XML Document that corresponds to the given Message object. </p>
092         * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
093         * into it that correspond to the groups and segments that belong to the message type that your subclass
094         * of XMLParser supports.  Then, for each segment in the message, call the method
095         * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
096         * that segment and the corresponding Segment object from the given Message.</p>
097         */
098        public Document encodeDocument(Message source) throws HL7Exception {
099            String messageClassName = source.getClass().getName();
100            String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
101            org.w3c.dom.Document doc = null;
102            try {
103                doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
104                Element root = doc.createElement(messageName);
105                doc.appendChild(root);
106            }
107            catch (Exception e) {
108                throw new HL7Exception(
109                    "Can't create XML document - " + e.getClass().getName(),
110                    HL7Exception.APPLICATION_INTERNAL_ERROR,
111                    e);
112            }
113            encode(source, doc.getDocumentElement());
114            return doc;
115        }
116    
117        /**
118         * Copies data from a group object into the corresponding group element, creating any 
119         * necessary child nodes.  
120         */
121        private void encode(ca.uhn.hl7v2.model.Group groupObject, org.w3c.dom.Element groupElement) throws HL7Exception {
122            String[] childNames = groupObject.getNames();
123            String messageName = groupObject.getMessage().getName();
124            
125            try {
126                for (int i = 0; i < childNames.length; i++) {
127                    Structure[] reps = groupObject.getAll(childNames[i]);
128                    for (int j = 0; j < reps.length; j++) {
129                        String elementName = makeGroupElementName(messageName, childNames[i]);
130                                            Element childElement;
131                                            try {
132                                                    childElement = groupElement.getOwnerDocument().createElement(elementName);
133                                    } catch (DOMException e) {
134                                        throw new HL7Exception(
135                                            "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(),
136                                            HL7Exception.APPLICATION_INTERNAL_ERROR,
137                                            e);
138                                    }
139                        groupElement.appendChild(childElement);
140                        if (reps[j] instanceof Group) {
141                            encode((Group) reps[j], childElement);
142                        }
143                        else if (reps[j] instanceof Segment) {
144                            encode((Segment) reps[j], childElement);
145                        }
146                    }
147                }
148            } catch (DOMException e) {
149                throw new HL7Exception(
150                    "Can't encode group " + groupObject.getClass().getName(),
151                    HL7Exception.APPLICATION_INTERNAL_ERROR,
152                    e);
153            }
154        }
155    
156        /**
157         * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
158         * <p>The easiest way to implement this method for a particular message structure is as follows:
159         * <ol><li>Create an instance of the Message type you are going to handle with your subclass
160         * of XMLParser</li>
161         * <li>Go through the given Document and find the Elements that represent the top level of
162         * each message segment. </li>
163         * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
164         * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
165         * At the end of this process, your Message object should be populated with data from the XML
166         * Document.</p>
167         * @throws HL7Exception if the message is not correctly formatted.
168         * @throws EncodingNotSupportedException if the message encoded
169         *     is not supported by this parser.
170         */
171        public Message parseDocument(org.w3c.dom.Document XMLMessage, String version) throws HL7Exception {
172            String messageName = XMLMessage.getDocumentElement().getTagName();
173            Message message = instantiateMessage(messageName, version, true);
174            parse(message, XMLMessage.getDocumentElement());
175            return message;
176        }
177    
178        /**
179         * Populates the given group object with data from the given group element, ignoring 
180         * any unrecognized nodes.  
181         */
182        private void parse(ca.uhn.hl7v2.model.Group groupObject, org.w3c.dom.Element groupElement) throws HL7Exception {
183            String[] childNames = groupObject.getNames();
184            String messageName = groupObject.getMessage().getName();
185            
186            NodeList allChildNodes = groupElement.getChildNodes();
187            List<String> unparsedElementList = new ArrayList<String>();
188            for (int i = 0; i < allChildNodes.getLength(); i++) {
189                Node node = allChildNodes.item(i);
190                String name = node.getNodeName();
191                if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
192                    unparsedElementList.add(name);                
193                }
194            }
195            
196            //we're not too fussy about order here (all occurrences get parsed as repetitions) ... 
197            for (int i = 0; i < childNames.length; i++) {
198                unparsedElementList.remove(childNames[i]);
199                
200                // 4 char segment names are second occurrences of a segment within a single message
201                // structure. e.g. the second PID segment in an A17 patient swap message is known
202                // to hapi's code represenation as PID2
203                if (childNames[i].length() != 4) {   
204                    parseReps(groupElement, groupObject, messageName, childNames[i], childNames[i]);
205                } else {
206                    log.debug("Skipping rep segment: {}", childNames[i]);
207                }
208            }
209            
210            for (int i = 0; i < unparsedElementList.size(); i++) {
211                String segName = (String) unparsedElementList.get(i);            
212                String segIndexName = groupObject.addNonstandardSegment(segName);
213                parseReps(groupElement, groupObject, messageName, segName, segIndexName);
214            }
215        }
216        
217        //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2) 
218        private void parseReps(Element groupElement, Group groupObject, 
219                String messageName, String childName, String childIndexName) throws HL7Exception {
220            
221            List<Element> reps = getChildElementsByTagName(groupElement, makeGroupElementName(messageName, childName));
222            log.debug("# of elements matching {}: {}", 
223                            makeGroupElementName(messageName, childName), reps.size());
224    
225                    if (groupObject.isRepeating(childIndexName)) {
226                            for (int i = 0; i < reps.size(); i++) {
227                                    parseRep(reps.get(i), groupObject.get(childIndexName, i));
228                            }                                       
229                    } else {
230                            if (reps.size() > 0) {
231                                    parseRep(reps.get(0), groupObject.get(childIndexName, 0));                              
232                            }
233    
234    //                      if (reps.size() > 1) {                       
235    //                              String newIndexName = groupObject.addNonstandardSegment(childName);                     
236    //                              for (int i = 1; i < reps.size(); i++) {
237    //                                      parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
238    //                              }                                                               
239    //                      }
240                            if (reps.size() > 1) {
241                                    String newIndexName = "";
242                                    int i=1;
243                                    try     {
244                                            for (i = 1; i < reps.size(); i++) {
245                                                    newIndexName = childName+(i+1);
246                                                    Structure st = groupObject.get(newIndexName);
247                                                    parseRep(reps.get(i), st);
248                                            }
249                                    } catch(Throwable t) {
250                                            log.info("Issue Parsing: " + t);
251                                            newIndexName = groupObject.addNonstandardSegment(childName);
252                                            for (int j = i; j < reps.size(); j++) {
253                                                    parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
254                                            }
255                                    }
256                            }
257                            
258                    }
259        }
260        
261        private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
262                    if (theObj instanceof Group) {
263                            parse((Group) theObj, theElem);
264                    }
265                    else if (theObj instanceof Segment) {
266                            parse((Segment) theObj, theElem);
267                    }                
268                    log.debug("Parsed element: {}", theElem.getNodeName());         
269        }
270        
271        //includes direct children only
272        private List<Element> getChildElementsByTagName(Element theElement, String theName) {
273            List<Element> result = new ArrayList<Element>(10);
274            NodeList children = theElement.getChildNodes();
275            
276            for (int i = 0; i < children.getLength(); i++) {
277                    Node child = children.item(i);
278                    if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(theName)) {
279                            result.add((Element)child);
280                    }
281            }
282            
283            return result; 
284        }
285        
286        /** 
287         * Given the name of a group element in an XML message, returns the corresponding 
288         * group class name.  This name is identical except in order to be a valid class 
289         * name, the dot character immediately following the message name is replaced with 
290         * an underscore.  For example, there is a group element called ADT_A01.INSURANCE and the 
291         * corresponding group Class is called ADT_A01_INSURANCE. 
292         */
293    //    protected static String makeGroupClassName(String elementName) {
294    //        return elementName.replace('.', '_');
295    //    }
296    
297        /** 
298         * Given the name of a message and a Group class, returns the corresponding group element name in an 
299         * XML-encoded message.  This is the message name and group name separated by a dot. For example, 
300         * ADT_A01.INSURANCE.
301         * 
302         * If it looks like a segment name (i.e. has 3 characters), no change is made. 
303         */
304        protected static String makeGroupElementName(String messageName, String className) {
305            String ret = null;
306            
307            if (className.length() > 4 || ourForceGroupNames.contains(className)) {
308                StringBuilder elementName = new StringBuilder();
309                elementName.append(messageName);
310                elementName.append('.');
311                elementName.append(className);
312                ret = elementName.toString();
313            } else if (className.length() == 4) {
314                // It is not clear why this case is needed.. We should figure out
315                    // why it was added, since removing it or optimizing its use would
316                    // prevent the need for "ourForGroupNames" above
317                    ret = className.substring(0,3);
318            } else {
319                ret = className;
320            }
321            
322            return ret;
323        }
324    
325        /** Test harness */
326        public static void main(String args[]) {
327            if (args.length != 1) {
328                System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
329                System.exit(1);
330            }
331    
332            //read and parse message from file 
333            try {
334                File messageFile = new File(args[0]);
335                long fileLength = messageFile.length();
336                FileReader r = new FileReader(messageFile);
337                char[] cbuf = new char[(int) fileLength];
338                System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
339                r.close();
340                String messString = String.valueOf(cbuf);
341    
342                Parser inParser = null;
343                Parser outParser = null;
344                PipeParser pp = new PipeParser();
345                ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
346                System.out.println("Encoding: " + pp.getEncoding(messString));
347                if (pp.getEncoding(messString) != null) {
348                    inParser = pp;
349                    outParser = xp;
350                }
351                else if (xp.getEncoding(messString) != null) {
352                    inParser = xp;
353                    outParser = pp;
354                }
355    
356                Message mess = inParser.parse(messString);
357                System.out.println("Got message of type " + mess.getClass().getName());
358    
359                String otherEncoding = outParser.encode(mess);
360                System.out.println(otherEncoding);
361            }
362            catch (Exception e) {
363                e.printStackTrace();
364            }
365        }
366    
367        /**
368         * {@inheritDoc}
369         */
370            @Override
371            public void parse(Message theMessage, String theString) throws HL7Exception {
372                    Document doc = parseStringIntoDocument(theString);
373            parse(theMessage, doc.getDocumentElement());
374            }
375    
376    
377    }