001    package ca.uhn.hl7v2.parser;
002    
003    import java.util.Collections;
004    import java.util.HashSet;
005    import java.util.Set;
006    
007    import ca.uhn.hl7v2.model.Varies;
008    import ca.uhn.hl7v2.util.Terser;
009    
010    public class ParserConfiguration {
011    
012            private String myDefaultObx2Type;
013            private boolean myEncodeEmptyMandatorySegments = true;
014            private Set<String> myForcedEncode = new HashSet<String>();
015            private String myInvalidObx2Type;
016    
017            /**
018             * <p>
019             * Forces the parser to encode certain segments/fields, even if they contain
020             * no content. This method may be called multiple times with multiple path
021             * definitions, and each path definition contains the path to the segment or
022             * field which needs to be forced.
023             * </p>
024             * <p>
025             * Path definitions are similar in format to {@link Terser Terser} paths.
026             * They contain a slash-separated lookup path to reach a given segment, and
027             * optionally a field number. The following are examples of paths which
028             * could be added here, as well as the sample output for an otherwise empty
029             * ORU^R01 message:
030             * </p>
031             * <table>
032             * <thead>
033             * <tr>
034             * <th>Forced Encode Path</th>
035             * <th>Encode Output</th>
036             * </tr>
037             * </thead>
038             * <tr>
039             * <td>None (for illustration purposes)</td>
040             * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4</td>
041             * </tr>
042             * <tr>
043             * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC</td>
044             * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
045             * ORC|</td>
046             * </tr>
047             * <tr>
048             * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td>
049             * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
050             * ORC||||</td>
051             * </tr>
052             * <tr>
053             * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2</td>
054             * <td>MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
055             * ORC||||^</td>
056             * </tr>
057             * </table>
058             * <p>
059             * While empty segments do not generally have any meaning according to HL7,
060             * this may be useful when transmitting to systems which rely on segments
061             * being received even if they have no content.
062             * </p>
063             * <p>
064             * Note that this configuration item currently only applies to
065             * {@link PipeParser}
066             * </p>
067             * 
068             * @since 1.3
069             */
070            public void addForcedEncode(String theForcedEncode) {
071                    if (theForcedEncode == null) {
072                            throw new NullPointerException("forced encode may not be null");
073                    }
074    
075                    int lastSlashIndex = theForcedEncode.lastIndexOf('/');
076                    lastSlashIndex = Math.max(lastSlashIndex, 0);
077    
078                    if (lastSlashIndex == 0) {
079                            if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
080                                    throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
081                            }
082                    } else {
083                            if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
084                                    throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
085                            }
086                    }
087                    myForcedEncode.add(theForcedEncode);
088            }
089    
090            /**
091             * Removes a forced encode entry
092             * 
093             * @see #addForcedEncode(String)
094             * @since 1.3
095             */
096            public void removeForcedEncode(String theForcedEncode) {
097                    if (theForcedEncode == null) {
098                            throw new NullPointerException("forced encode may not be null");
099                    }
100                    
101                    myForcedEncode.remove(theForcedEncode);
102            }
103            
104            boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) {
105                    for (String next : getForcedEncode()) {
106                            if (next.startsWith(theTerserPath)) {
107                                    return true;
108                            }
109                    }
110                    return false;
111            }
112    
113            int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) {
114                    int forceUpToFieldNum = 0;
115                    for (String nextPath : getForcedEncode()) {
116                            if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) {
117                                    int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1);
118                                    if (endOfFieldDef == -1) {
119                                            endOfFieldDef = nextPath.length();
120                                    }
121                                    String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef);
122                                    forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString));
123                            }
124                    }
125                    return forceUpToFieldNum;
126            }
127    
128            /**
129             * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a
130             * missing OBX-2 value
131             * 
132             * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment
133             *         with a missing OBX-2 value
134             * @see #setDefaultObx2Type(String)
135             */
136            public String getDefaultObx2Type() {
137                    return myDefaultObx2Type;
138            }
139    
140            /**
141             * @return Returns the forced encode strings added by
142             *         {@link #addForcedEncode(String)}
143             * 
144             * @see #addForcedEncode(String)
145             * @since 1.3
146             */
147            public Set<String> getForcedEncode() {
148                    return Collections.unmodifiableSet(myForcedEncode);
149            }
150    
151            /**
152             * @return Returns <code>true</code> if empty segments should still be
153             *         encoded if they are mandatory within their message structure.
154             * @see #setEncodeEmptyMandatoryFirstSegments(boolean)
155             */
156            public boolean isEncodeEmptyMandatorySegments() {
157                    return myEncodeEmptyMandatorySegments;
158            }
159    
160            /**
161             * Returns the value provides a default datatype ("ST", "NM", etc) for an
162             * OBX segment with an invalid OBX-2 value.
163             * 
164             * @return Returns the value provides a default datatype ("ST", "NM", etc)
165             *         for an OBX segment with an invalid OBX-2 value.
166             * @see #setInvalidObx2Type(String)
167             */
168            public String getInvalidObx2Type() {
169                    return myInvalidObx2Type;
170            }
171    
172            /**
173             * <p>
174             * If this property is set, the value provides a default datatype ("ST",
175             * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful
176             * when parsing messages from systems which do not correctly populate OBX-2.
177             * </p>
178             * <p>
179             * For example, if this property is set to "ST", and the following OBX
180             * segment is encountered:
181             * 
182             * <pre>
183             * OBX|||||This is a value
184             * </pre>
185             * 
186             * It will be parsed as though it had read:
187             * 
188             * <pre>
189             * OBX||ST|||This is a value
190             * </pre>
191             * 
192             * </p>
193             * <p>
194             * Note that this configuration can also be set globally using the system
195             * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to
196             * {@link ParserConfiguration} takes priority over the system property.
197             * </p>
198             * 
199             * @param theDefaultObx2Type
200             *            If this property is set, the value provides a default datatype
201             *            ("ST", "NM", etc) for an OBX segment with a missing OBX-2
202             *            value
203             * @see #setInvalidObx2Type(String)
204             * @see Varies#INVALID_OBX2_TYPE_PROP
205             */
206            public void setDefaultObx2Type(String theDefaultObx2Type) {
207                    myDefaultObx2Type = theDefaultObx2Type;
208            }
209    
210            /**
211             * <p>
212             * If set to <code>true</code> (default is <code>true</code>), when encoding
213             * a group using the PipeParser where the first segment is required, but no
214             * data has been populated in that segment, the empty segment is now still
215             * encoded if needed as a blank segment in order to give parsers a hint
216             * about which group subsequent segments are in. This helps to ensure that
217             * messages can be "round tripped", meaning that a message which is parsed,
218             * encoded, and then re-parsed should contain exactly the same structure
219             * from beginning to end.
220             * </p>
221             * <p>
222             * </p>
223             * For example, in an ORU^R01 message with a populated OBX segment, but no
224             * data in the mandatory OBR segment which begins the ORDER_OBSERVATION
225             * group the message would still contain an empty OBR segment when encoded:
226             * 
227             * <pre>
228             *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
229             *      OBR|
230             *      OBX||ST|||Value Data
231             * </pre>
232             * 
233             * Previously, the following encoding would have occurred, which would have
234             * incorrectly been parsed as having a custom OBX segment instead of having
235             * a normal ORDER_OBSERVATION group:
236             * 
237             * <pre>
238             *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
239             *      OBX||ST|||Value Data
240             * </pre>
241             * 
242             * @param theEncodeEmptyMandatorySegments
243             *            If set to <code>true</code> (default is <code>true</code>),
244             *            when encoding a group using the PipeParser where the first
245             *            segment is required, but no data has been populated in that
246             *            segment, the empty segment is now still encoded if needed as a
247             *            blank segment in order to give parsers a hint about which
248             *            group subsequent segments are in
249             */
250            public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) {
251                    myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments;
252            }
253    
254            /**
255             * <p>
256             * If this property is set, the value provides a default datatype ("ST",
257             * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful
258             * when parsing messages from systems which do not correctly populate OBX-2.
259             * </p>
260             * <p>
261             * For example, if this property is set to "ST", and the following OBX
262             * segment is encountered:
263             * 
264             * <pre>
265             * OBX||INVALID|||This is a value
266             * </pre>
267             * 
268             * It will be parsed as though it had read:
269             * 
270             * <pre>
271             * OBX||ST|||This is a value
272             * </pre>
273             * 
274             * </p>
275             * <p>
276             * Note that this configuration can also be set globally using the system
277             * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to
278             * {@link ParserConfiguration} takes priority over the system property.
279             * </p>
280             * 
281             * @param theDefaultObx2Type
282             *            If this property is set, the value provides a default datatype
283             *            ("ST", "NM", etc) for an OBX segment with an invalid OBX-2
284             *            value. This is useful when parsing messages from systems which
285             *            do not correctly populate OBX-2.
286             * @see ParserConfiguration#setDefaultObx2Type(String)
287             * @see Varies#DEFAULT_OBX2_TYPE_PROP
288             */
289            public void setInvalidObx2Type(String theInvalidObx2Type) {
290                    myInvalidObx2Type = theInvalidObx2Type;
291            }
292    
293    }