001 /*
002 * Created on 28-Apr-2004
003 */
004 package ca.uhn.hl7v2.preparser;
005
006 import java.util.Arrays;
007 import java.util.List;
008 import java.util.Properties;
009 import java.util.StringTokenizer;
010
011 import ca.uhn.hl7v2.HL7Exception;
012 import ca.uhn.hl7v2.parser.EncodingDetector;
013 import ca.uhn.hl7v2.util.Terser;
014
015 /**
016 * <p>Extracts specified fields from unparsed messages. This class is a
017 * facade for the ER7 and XML classes. Use it like this: </p>
018 *
019 * <code>
020 * String message = null; //... your ER7 or XML message string goes here
021 * String[] fieldSpecs = {"MSH-9-1", "MSH-9-2", "MSH-12"};
022 * String[] fields = PreParser.getFields(message, fieldSpecs);
023 * </code>
024 *
025 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
026 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:37 $ by $Author: jamesagnew $
027 */
028 public class PreParser {
029
030 /**
031 * Extracts selected fields from a message.
032 *
033 * @param theMessageText an unparsed message from which to get fields
034 * @param thePathSpecs Terser-like paths to fields in the message. See documentation
035 * for Terser. These paths are identical except that they start with the segment
036 * name (search flags and group names are to be omitted as they are not relevant
037 * with unparsed ER7 messages).
038 * @return field values corresponding to the given paths
039 * @throws HL7Exception
040 */
041 public static String[] getFields(String theMessageText, String... thePathSpecs) throws HL7Exception {
042 DatumPath[] paths = new DatumPath[thePathSpecs.length];
043 for (int i = 0; i < thePathSpecs.length; i++) {
044 StringTokenizer tok = new StringTokenizer(thePathSpecs[i], "-", false);
045 String segSpec = tok.nextToken();
046 tok = new StringTokenizer(segSpec, "()", false);
047 String segName = tok.nextToken();
048 if (segName.length() != 3) {
049 throw new HL7Exception("In field path, " + segName + " is not a valid segment name");
050 }
051 int segRep = 0;
052 if (tok.hasMoreTokens()) {
053 String rep = tok.nextToken();
054 try {
055 segRep = Integer.parseInt(rep);
056 } catch (NumberFormatException e) {
057 throw new HL7Exception("In field path, segment rep" + rep + " is not valid", e);
058 }
059 }
060
061 int[] indices = Terser.getIndices(thePathSpecs[i]);
062 paths[i] = new DatumPath();
063 paths[i].add(segName).add(segRep);
064 paths[i].add(indices[0]).add(indices[1]).add(indices[2]).add(indices[3]);
065
066 }
067 return getFields(theMessageText, paths);
068 }
069
070 /**
071 * Gets selected fields from a message, as with String[] arg version but
072 * using DatumPaths.
073 */
074 private static String[] getFields(String theMessageText, DatumPath[] thePaths) throws HL7Exception {
075 String[] fields = new String[thePaths.length];
076 Properties props = new Properties();
077
078 List<DatumPath> mask = Arrays.asList(thePaths);
079
080 boolean OK = false;
081 if (EncodingDetector.isEr7Encoded(theMessageText)) {
082 OK = ER7.parseMessage(props, mask, theMessageText);
083 } else if (EncodingDetector.isXmlEncoded(theMessageText)) {
084 OK = XML.parseMessage(props, theMessageText, null);
085 } else {
086 throw new HL7Exception("Message encoding is not recognized");
087 }
088
089 if (!OK) {
090 throw new HL7Exception("Parse failed");
091 }
092
093 for (int i = 0; i < fields.length; i++) {
094 fields[i] = props.getProperty(thePaths[i].toString());
095 }
096 return fields;
097 }
098
099 }