001package ca.uhn.hl7v2.util;
002
003import ca.uhn.hl7v2.parser.*;
004import ca.uhn.hl7v2.model.Message;
005import ca.uhn.hl7v2.HL7Exception;
006import java.util.regex.*;
007import org.w3c.dom.*;
008import org.xml.sax.SAXException;
009import org.xml.sax.InputSource;
010import org.apache.xml.serialize.*;
011import org.apache.xerces.parsers.DOMParser;
012import org.apache.xerces.parsers.StandardParserConfiguration;
013import java.io.*;
014
015/**
016 * Tools for testing message strings for semantic equivalence without assuming the correctness
017 * of parsers.  
018 * @author Bryan Tripp
019 */
020public class EncodedMessageComparator {
021    
022    private static GenericParser parser = new GenericParser();  
023    
024    /**
025     * Returns a "standardized" equivalent of the given message string.  For delimited
026     * messages, the returned value is the shortest string that has an equivalent
027     * meaning in HL7.  For XML-encoded messages, the returned value is equivalent XML output
028     * using a standard pretty-print format.  An automatic determination is made about whether 
029     * the given string is XML or ER7 (i.e. traditionally) encoded.
030     * @param message an XML-encoded or ER7-encoded message string
031     */
032    public static String standardize(String message) throws SAXException {
033        String result = null;
034        String encoding = parser.getEncoding(message);
035        if (encoding.equals("XML")) {
036            result = standardizeXML(message);
037        } else {
038            result = standardizeER7(message);
039        }
040        return result;
041    }
042    
043    /**
044     * Returns the shortest string that is semantically equivalent to a given ER7-encoded 
045     * message string.
046     */
047    public static String standardizeER7(String message) {
048        
049        //make delimiter sequences (must quote with \ if not alphanumeric; can't otherwise because of regexp rules)
050        char fieldDelimChar = message.charAt(3);
051        String fieldDelim = String.valueOf(fieldDelimChar);
052        if (!Character.isLetterOrDigit(fieldDelimChar)) fieldDelim = "\\" + fieldDelimChar;
053        
054        char compSepChar = message.charAt(4);
055        String compSep = String.valueOf(compSepChar);
056        if (!Character.isLetterOrDigit(compSepChar)) compSep = "\\" + compSepChar;
057        
058        char repSepChar = message.charAt(5);
059        String repSep = String.valueOf(repSepChar);
060        if (!Character.isLetterOrDigit(repSepChar)) repSep = "\\" + repSepChar;
061        
062        char subSepChar = message.charAt(7);
063        String subSep = String.valueOf(subSepChar);
064        if (!Character.isLetterOrDigit(subSepChar)) subSep = "\\" + subSepChar;
065        
066        //char space = ' ';
067        
068        /* Things to strip (cumulative):
069         *  - all delimiters and repetition separators before end line (i.e. end segment)
070         *  - repetition separators, comp and subcomp delims before new field
071         *  - subcomponent delimiters before new component
072         */
073        Pattern endSegment = Pattern.compile("[" + fieldDelim + compSep + repSep + subSep + "]*[\n\r]+");
074        message = endSegment.matcher(message).replaceAll("\r");
075        
076        Pattern endField = Pattern.compile("[" + repSep + compSep + subSep + "]*" + fieldDelim);
077        message = endField.matcher(message).replaceAll(String.valueOf(fieldDelim));
078        
079        Pattern endComp = Pattern.compile("[" + subSep + "]*" + compSep);
080        message = endComp.matcher(message).replaceAll(String.valueOf(compSep));
081        
082        //Pattern endSub = Pattern.compile("[ ]*" + subSep);
083        //message = endSub.matcher(message).replaceAll(String.valueOf(subSep));
084        
085        //handle special case of subcomp delim in encoding characters
086        message = message.substring(0, 7) + subSepChar + message.substring(7);
087        
088        return message;
089    }
090    
091    /**
092     * Returns a semantic equivalent of a given XML-encoded message in a default format.
093     * Attributes, comments, and processing instructions are not considered to change the 
094     * HL7 meaning of the message, and are removed in the standardized representation.    
095     */
096    public static String standardizeXML(String message) throws SAXException {
097        DOMParser parser = new DOMParser(new StandardParserConfiguration());
098        parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
099        
100        Document doc = null;
101        StringWriter out = new StringWriter();
102        try {
103            synchronized (parser) {
104                parser.parse(new InputSource(new StringReader(message)));
105                doc = parser.getDocument();
106            }
107            clean(doc.getDocumentElement());
108            OutputFormat outputFormat = new OutputFormat("", null, true);
109            XMLSerializer ser = new XMLSerializer(out, outputFormat);            
110            ser.serialize(doc);
111        } catch (IOException e) {
112            throw new RuntimeException("IOException doing IO to a string!!! " + e.getMessage());
113        }
114        return out.toString();
115    }
116    
117    /** Removes attributes, comments, and processing instructions. */
118    private static void clean(Element elem) {
119        NodeList children = elem.getChildNodes();        
120        for (int i = 0; i < children.getLength(); i++) {
121            Node child = children.item(i);
122            if (child.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE 
123                || child.getNodeType() == Node.COMMENT_NODE)
124            {
125                elem.removeChild(child);
126            } else if (child.getNodeType() == Node.ELEMENT_NODE) {
127                clean((Element) child);
128            }
129        }
130        
131        NamedNodeMap attributes = elem.getAttributes();
132        //get names
133        String[] names = new String[attributes.getLength()];
134        for (int i = 0; i < names.length; i++) {
135            names[i] = attributes.item(i).getNodeName();
136        }
137        //remove by name
138        for (int i = 0; i < names.length; i++) {
139            attributes.removeNamedItem(names[i]);
140        }
141    }
142    
143    /**
144     * <p>Compares two HL7 messages to see if they are equivalent (in terms of their  
145     * HL7 meaning).  Semantically irrelevant differences (e.g. spaces in an XML tag; 
146     * extra field delimiters at the end of a segment; XML vs. ER7 encoding; XML attributes)
147     * are ignored. This check is performed without assuming the correctness of the HAPI parsers, 
148     * and can therefore be used to test them.  This is done by parsing a message, encoding it
149     * again, and comparing the result with this original.  </p>
150     * <p>If one message is in XML and the other in ER7, the former is converted to ER7 to 
151     * perform the comparison.  This process relies on the HAPI parsers.  However, the 
152     * parsed message is first encoded as XML and compared to the original, so that the 
153     * integrity of the parser can be verified.  An exception is thrown if this comparison 
154     * is unsuccessful.  </p>
155     * @return true if given messages are semantically equivalent 
156     */
157    public static boolean equivalent(String message1, String message2) throws HL7Exception {
158        String encoding1 = parser.getEncoding(message1);
159        String encoding2 = parser.getEncoding(message2);
160        
161        if (!encoding1.equals(encoding2)) {
162            if (encoding1.equals("XML")) {
163                message1 = safeER7Conversion(message1);
164            } else {
165                message2 = safeER7Conversion(message2);
166            }
167        }
168        
169        String std1, std2;
170        try {
171            std1 = standardize(message1);
172            std2 = standardize(message2);
173        } catch (SAXException e) {
174            throw new HL7Exception("Equivalence check failed due to SAXException: " + e.getMessage());
175        }
176        
177        return std1.equals(std2);
178    }
179    
180    /** 
181     * Converts XML message to ER7, first checking integrity of parse and throwing 
182     * an exception if parse not correct
183     */
184    private static String safeER7Conversion(String xmlMessage) throws HL7Exception {
185        Message m = parser.parse(xmlMessage);
186
187        String check = parser.encode(m, "XML");
188        if (!equivalent(xmlMessage, check)) {
189            throw new HL7Exception("Parsed and encoded message not equivalent to original (possibilities: invalid message, bug in parser)");
190        }
191        
192        return parser.encode(m, "VB");        
193    }
194    
195    /** 
196     * Compares messages in two files
197     */
198    public static void main(String args[]) {
199        
200    }
201    
202}