001package ca.uhn.hl7v2.parser;
002
003import java.util.Collections;
004import java.util.HashSet;
005import java.util.Set;
006
007import ca.uhn.hl7v2.model.Varies;
008import ca.uhn.hl7v2.util.Terser;
009
010public class ParserConfiguration {
011
012        private String myDefaultObx2Type;
013        private boolean myEncodeEmptyMandatorySegments = true;
014        private Set<String> myForcedEncode = new HashSet<String>();
015        private String myInvalidObx2Type;
016
017        /**
018         * <p>
019         * Forces the parser to encode certain segments/fields, even if they contain
020         * no content. This method may be called multiple times with multiple path
021         * definitions, and each path definition contains the path to the segment or
022         * field which needs to be forced.
023         * </p>
024         * <p>
025         * Path definitions are similar in format to {@link Terser Terser} paths.
026         * They contain a slash-separated lookup path to reach a given segment, and
027         * optionally a field number. The following are examples of paths which
028         * could be added here, as well as the sample output for an otherwise empty
029         * ORU^R01 message:
030         * </p>
031         * <table>
032         * <thead>
033         * <tr>
034         * <th>Forced Encode Path</th>
035         * <th>Encode Output</th>
036         * </tr>
037         * </thead>
038         * <tr>
039         * <td>None (for illustration purposes)</td>
040         * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4</td>
041         * </tr>
042         * <tr>
043         * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC</td>
044         * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
045         * ORC|</td>
046         * </tr>
047         * <tr>
048         * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td>
049         * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
050         * ORC||||</td>
051         * </tr>
052         * <tr>
053         * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2</td>
054         * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
055         * ORC||||^</td>
056         * </tr>
057         * </table>
058         * <p>
059         * While empty segments do not generally have any meaning according to HL7,
060         * this may be useful when transmitting to systems which rely on segments
061         * being received even if they have no content.
062         * </p>
063         * <p>
064         * Note that this configuration item currently only applies to
065         * {@link PipeParser}
066         * </p>
067         * 
068         * @since 1.3
069         */
070        public void addForcedEncode(String theForcedEncode) {
071                if (theForcedEncode == null) {
072                        throw new NullPointerException("forced encode may not be null");
073                }
074
075                int lastSlashIndex = theForcedEncode.lastIndexOf('/');
076                lastSlashIndex = Math.max(lastSlashIndex, 0);
077
078                if (lastSlashIndex == 0) {
079                        if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
080                                throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
081                        }
082                } else {
083                        if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
084                                throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
085                        }
086                }
087                myForcedEncode.add(theForcedEncode);
088        }
089
090        /**
091         * Removes a forced encode entry
092         * 
093         * @see #addForcedEncode(String)
094         * @since 1.3
095         */
096        public void removeForcedEncode(String theForcedEncode) {
097                if (theForcedEncode == null) {
098                        throw new NullPointerException("forced encode may not be null");
099                }
100                
101                myForcedEncode.remove(theForcedEncode);
102        }
103        
104        boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) {
105                for (String next : getForcedEncode()) {
106                        if (next.startsWith(theTerserPath)) {
107                                return true;
108                        }
109                }
110                return false;
111        }
112
113        int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) {
114                int forceUpToFieldNum = 0;
115                for (String nextPath : getForcedEncode()) {
116                        if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) {
117                                int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1);
118                                if (endOfFieldDef == -1) {
119                                        endOfFieldDef = nextPath.length();
120                                }
121                                String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef);
122                                forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString));
123                        }
124                }
125                return forceUpToFieldNum;
126        }
127
128        /**
129         * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a
130         * missing OBX-2 value
131         * 
132         * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment
133         *         with a missing OBX-2 value
134         * @see #setDefaultObx2Type(String)
135         */
136        public String getDefaultObx2Type() {
137                return myDefaultObx2Type;
138        }
139
140        /**
141         * @return Returns the forced encode strings added by
142         *         {@link #addForcedEncode(String)}
143         * 
144         * @see #addForcedEncode(String)
145         * @since 1.3
146         */
147        public Set<String> getForcedEncode() {
148                return Collections.unmodifiableSet(myForcedEncode);
149        }
150
151        /**
152         * @return Returns <code>true</code> if empty segments should still be
153         *         encoded if they are mandatory within their message structure.
154         * @see #setEncodeEmptyMandatoryFirstSegments(boolean)
155         */
156        public boolean isEncodeEmptyMandatorySegments() {
157                return myEncodeEmptyMandatorySegments;
158        }
159
160        /**
161         * Returns the value provides a default datatype ("ST", "NM", etc) for an
162         * OBX segment with an invalid OBX-2 value.
163         * 
164         * @return Returns the value provides a default datatype ("ST", "NM", etc)
165         *         for an OBX segment with an invalid OBX-2 value.
166         * @see #setInvalidObx2Type(String)
167         */
168        public String getInvalidObx2Type() {
169                return myInvalidObx2Type;
170        }
171
172        /**
173         * <p>
174         * If this property is set, the value provides a default datatype ("ST",
175         * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful
176         * when parsing messages from systems which do not correctly populate OBX-2.
177         * </p>
178         * <p>
179         * For example, if this property is set to "ST", and the following OBX
180         * segment is encountered:
181         * 
182         * <pre>
183         * OBX|||||This is a value
184         * </pre>
185         * 
186         * It will be parsed as though it had read:
187         * 
188         * <pre>
189         * OBX||ST|||This is a value
190         * </pre>
191         * 
192         * </p>
193         * <p>
194         * Note that this configuration can also be set globally using the system
195         * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to
196         * {@link ParserConfiguration} takes priority over the system property.
197         * </p>
198         * 
199         * @param theDefaultObx2Type
200         *            If this property is set, the value provides a default datatype
201         *            ("ST", "NM", etc) for an OBX segment with a missing OBX-2
202         *            value
203         * @see #setInvalidObx2Type(String)
204         * @see Varies#INVALID_OBX2_TYPE_PROP
205         */
206        public void setDefaultObx2Type(String theDefaultObx2Type) {
207                myDefaultObx2Type = theDefaultObx2Type;
208        }
209
210        /**
211         * <p>
212         * If set to <code>true</code> (default is <code>true</code>), when encoding
213         * a group using the PipeParser where the first segment is required, but no
214         * data has been populated in that segment, the empty segment is now still
215         * encoded if needed as a blank segment in order to give parsers a hint
216         * about which group subsequent segments are in. This helps to ensure that
217         * messages can be "round tripped", meaning that a message which is parsed,
218         * encoded, and then re-parsed should contain exactly the same structure
219         * from beginning to end.
220         * </p>
221         * <p>
222         * </p>
223         * For example, in an ORU^R01 message with a populated OBX segment, but no
224         * data in the mandatory OBR segment which begins the ORDER_OBSERVATION
225         * group the message would still contain an empty OBR segment when encoded:
226         * 
227         * <pre>
228         *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
229         *      OBR|
230         *      OBX||ST|||Value Data
231         * </pre>
232         * 
233         * Previously, the following encoding would have occurred, which would have
234         * incorrectly been parsed as having a custom OBX segment instead of having
235         * a normal ORDER_OBSERVATION group:
236         * 
237         * <pre>
238         *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
239         *      OBX||ST|||Value Data
240         * </pre>
241         * 
242         * @param theEncodeEmptyMandatorySegments
243         *            If set to <code>true</code> (default is <code>true</code>),
244         *            when encoding a group using the PipeParser where the first
245         *            segment is required, but no data has been populated in that
246         *            segment, the empty segment is now still encoded if needed as a
247         *            blank segment in order to give parsers a hint about which
248         *            group subsequent segments are in
249         */
250        public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) {
251                myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments;
252        }
253
254        /**
255         * <p>
256         * If this property is set, the value provides a default datatype ("ST",
257         * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful
258         * when parsing messages from systems which do not correctly populate OBX-2.
259         * </p>
260         * <p>
261         * For example, if this property is set to "ST", and the following OBX
262         * segment is encountered:
263         * 
264         * <pre>
265         * OBX||INVALID|||This is a value
266         * </pre>
267         * 
268         * It will be parsed as though it had read:
269         * 
270         * <pre>
271         * OBX||ST|||This is a value
272         * </pre>
273         * 
274         * </p>
275         * <p>
276         * Note that this configuration can also be set globally using the system
277         * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to
278         * {@link ParserConfiguration} takes priority over the system property.
279         * </p>
280         * 
281         * @param theDefaultObx2Type
282         *            If this property is set, the value provides a default datatype
283         *            ("ST", "NM", etc) for an OBX segment with an invalid OBX-2
284         *            value. This is useful when parsing messages from systems which
285         *            do not correctly populate OBX-2.
286         * @see ParserConfiguration#setDefaultObx2Type(String)
287         * @see Varies#DEFAULT_OBX2_TYPE_PROP
288         */
289        public void setInvalidObx2Type(String theInvalidObx2Type) {
290                myInvalidObx2Type = theInvalidObx2Type;
291        }
292
293}