001 /**
002 The contents of this file are subject to the Mozilla Public License Version 1.1
003 (the "License"); you may not use this file except in compliance with the License.
004 You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 Software distributed under the License is distributed on an "AS IS" basis,
006 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 specific language governing rights and limitations under the License.
008
009 The Initial Developer of the Original Code is University Health Network. Copyright (C)
010 2001. All Rights Reserved.
011
012 Contributor(s): ______________________________________.
013
014 Alternatively, the contents of this file may be used under the terms of the
015 GNU General Public License (the �GPL�), in which case the provisions of the GPL are
016 applicable instead of those above. If you wish to allow use of your version of this
017 file only under the terms of the GPL and not to allow others to use your version
018 of this file under the MPL, indicate your decision by deleting the provisions above
019 and replace them with the notice and other provisions required by the GPL License.
020 If you do not delete the provisions above, a recipient may use your version of
021 this file under either the MPL or the GPL.
022
023 */
024 package ca.uhn.hl7v2.parser;
025
026 import java.io.File;
027 import java.io.FileReader;
028 import java.util.ArrayList;
029 import java.util.HashSet;
030 import java.util.List;
031 import java.util.Set;
032
033 import javax.xml.parsers.DocumentBuilderFactory;
034
035 import org.slf4j.Logger;
036 import org.slf4j.LoggerFactory;
037 import org.w3c.dom.DOMException;
038 import org.w3c.dom.Document;
039 import org.w3c.dom.Element;
040 import org.w3c.dom.Node;
041 import org.w3c.dom.NodeList;
042
043 import ca.uhn.hl7v2.HL7Exception;
044 import ca.uhn.hl7v2.model.Group;
045 import ca.uhn.hl7v2.model.Message;
046 import ca.uhn.hl7v2.model.Segment;
047 import ca.uhn.hl7v2.model.Structure;
048
049 /**
050 * <p>A default XMLParser. This class assigns segment elements (in an XML-encoded message)
051 * to Segment objects (in a Message object) using the name of a segment and the names
052 * of any groups in which the segment is nested. The names of group classes must correspond
053 * to the names of group elements (they must be identical except that a dot in the element
054 * name, following the message name, is replaced with an underscore, in order to consitute a
055 * valid class name). </p>
056 * <p>At the time of writing, the group names in the XML spec are changing. Many of the group
057 * names have been automatically generated based on the group contents. However, these automatic
058 * names are gradually being replaced with manually assigned names. This process is expected to
059 * be complete by November 2002. As a result, mismatches are likely. Messages could be
060 * transformed prior to parsing (using XSLT) as a work-around. Alternatively the group class names
061 * could be changed to reflect updates in the XML spec. Ultimately, HAPI group classes will be
062 * changed to correspond with the official group names, once these are all assigned. </p>
063 * @author Bryan Tripp
064 */
065 public class DefaultXMLParser extends XMLParser {
066
067 private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
068
069 private static final Set<String> ourForceGroupNames;
070
071 static {
072 ourForceGroupNames = new HashSet<String>();
073 ourForceGroupNames.add("DIET");
074 }
075
076 /** Creates a new instance of DefaultXMLParser */
077 public DefaultXMLParser() {
078 super();
079 }
080
081 /**
082 * Creates a new instance of DefaultXMLParser
083 *
084 * @param theFactory custom factory to use for model class lookup
085 */
086 public DefaultXMLParser(ModelClassFactory theFactory) {
087 super(theFactory);
088 }
089
090 /**
091 * <p>Creates an XML Document that corresponds to the given Message object. </p>
092 * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
093 * into it that correspond to the groups and segments that belong to the message type that your subclass
094 * of XMLParser supports. Then, for each segment in the message, call the method
095 * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
096 * that segment and the corresponding Segment object from the given Message.</p>
097 */
098 public Document encodeDocument(Message source) throws HL7Exception {
099 String messageClassName = source.getClass().getName();
100 String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
101 org.w3c.dom.Document doc = null;
102 try {
103 doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
104 Element root = doc.createElement(messageName);
105 doc.appendChild(root);
106 }
107 catch (Exception e) {
108 throw new HL7Exception(
109 "Can't create XML document - " + e.getClass().getName(),
110 HL7Exception.APPLICATION_INTERNAL_ERROR,
111 e);
112 }
113 encode(source, doc.getDocumentElement());
114 return doc;
115 }
116
117 /**
118 * Copies data from a group object into the corresponding group element, creating any
119 * necessary child nodes.
120 */
121 private void encode(ca.uhn.hl7v2.model.Group groupObject, org.w3c.dom.Element groupElement) throws HL7Exception {
122 String[] childNames = groupObject.getNames();
123 String messageName = groupObject.getMessage().getName();
124
125 try {
126 for (int i = 0; i < childNames.length; i++) {
127 Structure[] reps = groupObject.getAll(childNames[i]);
128 for (int j = 0; j < reps.length; j++) {
129 String elementName = makeGroupElementName(messageName, childNames[i]);
130 Element childElement;
131 try {
132 childElement = groupElement.getOwnerDocument().createElement(elementName);
133 } catch (DOMException e) {
134 throw new HL7Exception(
135 "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(),
136 HL7Exception.APPLICATION_INTERNAL_ERROR,
137 e);
138 }
139 groupElement.appendChild(childElement);
140 if (reps[j] instanceof Group) {
141 encode((Group) reps[j], childElement);
142 }
143 else if (reps[j] instanceof Segment) {
144 encode((Segment) reps[j], childElement);
145 }
146 }
147 }
148 } catch (DOMException e) {
149 throw new HL7Exception(
150 "Can't encode group " + groupObject.getClass().getName(),
151 HL7Exception.APPLICATION_INTERNAL_ERROR,
152 e);
153 }
154 }
155
156 /**
157 * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
158 * <p>The easiest way to implement this method for a particular message structure is as follows:
159 * <ol><li>Create an instance of the Message type you are going to handle with your subclass
160 * of XMLParser</li>
161 * <li>Go through the given Document and find the Elements that represent the top level of
162 * each message segment. </li>
163 * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
164 * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
165 * At the end of this process, your Message object should be populated with data from the XML
166 * Document.</p>
167 * @throws HL7Exception if the message is not correctly formatted.
168 * @throws EncodingNotSupportedException if the message encoded
169 * is not supported by this parser.
170 */
171 public Message parseDocument(org.w3c.dom.Document XMLMessage, String version) throws HL7Exception {
172 String messageName = XMLMessage.getDocumentElement().getTagName();
173 Message message = instantiateMessage(messageName, version, true);
174 parse(message, XMLMessage.getDocumentElement());
175 return message;
176 }
177
178 /**
179 * Populates the given group object with data from the given group element, ignoring
180 * any unrecognized nodes.
181 */
182 private void parse(ca.uhn.hl7v2.model.Group groupObject, org.w3c.dom.Element groupElement) throws HL7Exception {
183 String[] childNames = groupObject.getNames();
184 String messageName = groupObject.getMessage().getName();
185
186 NodeList allChildNodes = groupElement.getChildNodes();
187 List<String> unparsedElementList = new ArrayList<String>();
188 for (int i = 0; i < allChildNodes.getLength(); i++) {
189 Node node = allChildNodes.item(i);
190 String name = node.getNodeName();
191 if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
192 unparsedElementList.add(name);
193 }
194 }
195
196 //we're not too fussy about order here (all occurrences get parsed as repetitions) ...
197 for (int i = 0; i < childNames.length; i++) {
198 unparsedElementList.remove(childNames[i]);
199
200 // 4 char segment names are second occurrences of a segment within a single message
201 // structure. e.g. the second PID segment in an A17 patient swap message is known
202 // to hapi's code represenation as PID2
203 if (childNames[i].length() != 4) {
204 parseReps(groupElement, groupObject, messageName, childNames[i], childNames[i]);
205 } else {
206 log.debug("Skipping rep segment: {}", childNames[i]);
207 }
208 }
209
210 for (int i = 0; i < unparsedElementList.size(); i++) {
211 String segName = (String) unparsedElementList.get(i);
212 String segIndexName = groupObject.addNonstandardSegment(segName);
213 parseReps(groupElement, groupObject, messageName, segName, segIndexName);
214 }
215 }
216
217 //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2)
218 private void parseReps(Element groupElement, Group groupObject,
219 String messageName, String childName, String childIndexName) throws HL7Exception {
220
221 List<Element> reps = getChildElementsByTagName(groupElement, makeGroupElementName(messageName, childName));
222 log.debug("# of elements matching {}: {}",
223 makeGroupElementName(messageName, childName), reps.size());
224
225 if (groupObject.isRepeating(childIndexName)) {
226 for (int i = 0; i < reps.size(); i++) {
227 parseRep(reps.get(i), groupObject.get(childIndexName, i));
228 }
229 } else {
230 if (reps.size() > 0) {
231 parseRep(reps.get(0), groupObject.get(childIndexName, 0));
232 }
233
234 // if (reps.size() > 1) {
235 // String newIndexName = groupObject.addNonstandardSegment(childName);
236 // for (int i = 1; i < reps.size(); i++) {
237 // parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
238 // }
239 // }
240 if (reps.size() > 1) {
241 String newIndexName = "";
242 int i=1;
243 try {
244 for (i = 1; i < reps.size(); i++) {
245 newIndexName = childName+(i+1);
246 Structure st = groupObject.get(newIndexName);
247 parseRep(reps.get(i), st);
248 }
249 } catch(Throwable t) {
250 log.info("Issue Parsing: " + t);
251 newIndexName = groupObject.addNonstandardSegment(childName);
252 for (int j = i; j < reps.size(); j++) {
253 parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
254 }
255 }
256 }
257
258 }
259 }
260
261 private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
262 if (theObj instanceof Group) {
263 parse((Group) theObj, theElem);
264 }
265 else if (theObj instanceof Segment) {
266 parse((Segment) theObj, theElem);
267 }
268 log.debug("Parsed element: {}", theElem.getNodeName());
269 }
270
271 //includes direct children only
272 private List<Element> getChildElementsByTagName(Element theElement, String theName) {
273 List<Element> result = new ArrayList<Element>(10);
274 NodeList children = theElement.getChildNodes();
275
276 for (int i = 0; i < children.getLength(); i++) {
277 Node child = children.item(i);
278 if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(theName)) {
279 result.add((Element)child);
280 }
281 }
282
283 return result;
284 }
285
286 /**
287 * Given the name of a group element in an XML message, returns the corresponding
288 * group class name. This name is identical except in order to be a valid class
289 * name, the dot character immediately following the message name is replaced with
290 * an underscore. For example, there is a group element called ADT_A01.INSURANCE and the
291 * corresponding group Class is called ADT_A01_INSURANCE.
292 */
293 // protected static String makeGroupClassName(String elementName) {
294 // return elementName.replace('.', '_');
295 // }
296
297 /**
298 * Given the name of a message and a Group class, returns the corresponding group element name in an
299 * XML-encoded message. This is the message name and group name separated by a dot. For example,
300 * ADT_A01.INSURANCE.
301 *
302 * If it looks like a segment name (i.e. has 3 characters), no change is made.
303 */
304 protected static String makeGroupElementName(String messageName, String className) {
305 String ret = null;
306
307 if (className.length() > 4 || ourForceGroupNames.contains(className)) {
308 StringBuilder elementName = new StringBuilder();
309 elementName.append(messageName);
310 elementName.append('.');
311 elementName.append(className);
312 ret = elementName.toString();
313 } else if (className.length() == 4) {
314 // It is not clear why this case is needed.. We should figure out
315 // why it was added, since removing it or optimizing its use would
316 // prevent the need for "ourForGroupNames" above
317 ret = className.substring(0,3);
318 } else {
319 ret = className;
320 }
321
322 return ret;
323 }
324
325 /** Test harness */
326 public static void main(String args[]) {
327 if (args.length != 1) {
328 System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
329 System.exit(1);
330 }
331
332 //read and parse message from file
333 try {
334 File messageFile = new File(args[0]);
335 long fileLength = messageFile.length();
336 FileReader r = new FileReader(messageFile);
337 char[] cbuf = new char[(int) fileLength];
338 System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
339 r.close();
340 String messString = String.valueOf(cbuf);
341
342 Parser inParser = null;
343 Parser outParser = null;
344 PipeParser pp = new PipeParser();
345 ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
346 System.out.println("Encoding: " + pp.getEncoding(messString));
347 if (pp.getEncoding(messString) != null) {
348 inParser = pp;
349 outParser = xp;
350 }
351 else if (xp.getEncoding(messString) != null) {
352 inParser = xp;
353 outParser = pp;
354 }
355
356 Message mess = inParser.parse(messString);
357 System.out.println("Got message of type " + mess.getClass().getName());
358
359 String otherEncoding = outParser.encode(mess);
360 System.out.println(otherEncoding);
361 }
362 catch (Exception e) {
363 e.printStackTrace();
364 }
365 }
366
367 /**
368 * {@inheritDoc}
369 */
370 @Override
371 public void parse(Message theMessage, String theString) throws HL7Exception {
372 Document doc = parseStringIntoDocument(theString);
373 parse(theMessage, doc.getDocumentElement());
374 }
375
376
377 }