001 /**
002 * The contents of this file are subject to the Mozilla Public License Version 1.1
003 * (the "License"); you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 * Software distributed under the License is distributed on an "AS IS" basis,
006 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 * specific language governing rights and limitations under the License.
008 *
009 * The Original Code is "MessageQuery.java". Description:
010 * "Queries messages in an SQL-like style. "
011 *
012 * The Initial Developer of the Original Code is University Health Network. Copyright (C)
013 * 2005. All Rights Reserved.
014 *
015 * Contributor(s): ______________________________________.
016 *
017 * Alternatively, the contents of this file may be used under the terms of the
018 * GNU General Public License (the "GPL"), in which case the provisions of the GPL are
019 * applicable instead of those above. If you wish to allow use of your version of this
020 * file only under the terms of the GPL and not to allow others to use your version
021 * of this file under the MPL, indicate your decision by deleting the provisions above
022 * and replace them with the notice and other provisions required by the GPL License.
023 * If you do not delete the provisions above, a recipient may use your version of
024 * this file under either the MPL or the GPL.
025 *
026 */
027 package ca.uhn.hl7v2.util;
028
029 import java.util.ArrayList;
030 import java.util.HashMap;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Properties;
034 import java.util.StringTokenizer;
035 import java.util.regex.Matcher;
036 import java.util.regex.Pattern;
037
038 import ca.uhn.hl7v2.HL7Exception;
039 import ca.uhn.hl7v2.model.Message;
040
041 /**
042 * Queries messages in an SQL-like style. We get repeated row-like structures by
043 * looping over repetitions of groups, segments, or fields.
044 *
045 * This is a very advanced class ... maybe too advanced even for you. If you
046 * find it confusing, please note that there are simpler ways to get data from a
047 * message (like calling its getters or using Terser).
048 *
049 * LOOPING: You specify the loop points as part of the query. For example you
050 * could specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * is
051 * replaced by numbers 0, 1, 2, etc. as you loop through the results, so this
052 * example would loop through repetitions of MSH-18. If there are multiple loop
053 * points, the loops are nested so that each possible combination is returned.
054 * Looping stops when none of the fields under a loop point are valued. The name
055 * of the loop point ('x' in the example above) is arbitrary.
056 *
057 * SELECTING FIELDS: The syntax is similar to SQL, except that Terser paths are
058 * used in place of table.field. You can use the "as" keyword to give a field a
059 * name, like this: <code>select /.MSH-7 as msg_date</code>. If your field is
060 * under a loop point, replace the path up to the loop point with a loop point
061 * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
062 *
063 * SELECTING ROWS: A "row" is a combination of all selected fields at one
064 * iteration. You can filter which rows are returned using a where clause
065 * similar to that in SQL. Use exact values or regular expressions, for example:
066 * <code>where {1} like '.*blood.*'</code> or
067 * <code>where {1}/PID-3-1 = '111'</code> Multiple filters can be separated with
068 * commas (which mean 'and'). Future versions may support 'or', negation,
069 * brackets, etc., but this version doesn't.
070 *
071 * FULL EXAMPLE: select {pat-id}-1 as id loop pat-id = ./PID-3(*) where
072 * {pat-id}-2 = 'mrn'
073 *
074 * SUBTLETIES OF LOOPING: A loop point can be under another loop point. For
075 * example consider the message:
076 *
077 * MSH|etc.|etc. Z01|one~two|a Z01|three~four|b
078 *
079 * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: a
080 * one a two b three b four
081 *
082 * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would
083 * return: a one a two b one b two
084 *
085 * In the first case, one loop point refers to another. In the second case the
086 * loops are treated as independent, just as if they referred to different
087 * branches of the message.
088 *
089 * TODO: could support distinct easily by keeping record of rows and comparing
090 * each one to previous rows
091 *
092 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
093 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author:
094 * jamesagnew $
095 * @deprecated
096 */
097 public class MessageQuery {
098
099 /**
100 * @param theMessage
101 * an HL7 message from which data are to be queried
102 * @param theQuery
103 * the query (see class docs for syntax)
104 * @return data from the message that are selected by the query
105 */
106 public static Result query(Message theMessage, String theQuery) {
107 Properties clauses = getClauses(theQuery);
108
109 // parse select clause
110 StringTokenizer select = new StringTokenizer(
111 clauses.getProperty("select"), ", ", false);
112 List<String> fieldPaths = new ArrayList<String>(10);
113 Map<String, Integer> names = new HashMap<String, Integer>(10);
114 while (select.hasMoreTokens()) {
115 String token = select.nextToken();
116 if (token.equals("as")) {
117 if (!select.hasMoreTokens()) {
118 throw new IllegalArgumentException(
119 "Keyword 'as' must be followed by a field label");
120 }
121 names.put(select.nextToken(), fieldPaths.size() - 1);
122 } else {
123 fieldPaths.add(token);
124 }
125 }
126
127 // parse loop clause
128 StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop",
129 ""), ",", false);
130 List<String> loopPoints = new ArrayList<String>(10);
131 Map<String, Integer> loopPointNames = new HashMap<String, Integer>(10);
132 while (loop.hasMoreTokens()) {
133 String pointDecl = loop.nextToken();
134 StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
135 String name = tok.nextToken().trim();
136 String path = tok.nextToken().trim();
137 loopPoints.add(path);
138 loopPointNames.put(name, loopPoints.size() - 1);
139 }
140
141 // parse where clause
142 // TODO: this will do for now but it should really be evaluated like an
143 // expression
144 // rather than a list
145 StringTokenizer where = new StringTokenizer(clauses.getProperty(
146 "where", ""), ",", false);
147 List<String> filters = new ArrayList<String>();
148 while (where.hasMoreTokens()) {
149 filters.add(where.nextToken());
150 }
151 String[] filterPaths = new String[filters.size()];
152 String[] filterPatterns = new String[filters.size()];
153 boolean[] exactFlags = new boolean[filters.size()];
154
155 for (int i = 0; i < filters.size(); i++) {
156 exactFlags[i] = true;
157 String filter = filters.get(i);
158 String[] parts = splitFromEnd(filter, "=");
159 if (parts[1] != null) {
160 parts[1] = parts[1].substring(1);
161 } else {
162 exactFlags[i] = false;
163 parts = splitFromEnd(filter, "like");
164 parts[1] = parts[1].substring(4);
165 }
166 filterPaths[i] = parts[0].trim();
167 parts[1] = parts[1].trim();
168 filterPatterns[i] = parts[1].substring(1, parts[1].length() - 1);
169 }
170
171 return new ResultImpl(theMessage,
172 loopPoints.toArray(new String[0]), loopPointNames,
173 fieldPaths.toArray(new String[0]), names,
174 filterPaths, filterPatterns, exactFlags);
175 }
176
177 private static Properties getClauses(String theQuery) {
178 Properties clauses = new Properties();
179
180 String[] split = splitFromEnd(theQuery, "where ");
181 setClause(clauses, "where", split[1]);
182
183 split = splitFromEnd(split[0], "loop ");
184 setClause(clauses, "loop", split[1]);
185 setClause(clauses, "select", split[0]);
186
187 if (clauses.getProperty("where", "").indexOf("loop ") >= 0) {
188 throw new IllegalArgumentException(
189 "The loop clause must precede the where clause");
190 }
191 if (clauses.getProperty("select") == null) {
192 throw new IllegalArgumentException(
193 "The query must begin with a select clause");
194 }
195 return clauses;
196 }
197
198 private static void setClause(Properties theClauses, String theName,
199 String theClause) {
200 if (theClause != null) {
201 theClauses.setProperty(theName,
202 theClause.substring(theName.length()).trim());
203 }
204 }
205
206 private static String[] splitFromEnd(String theString, String theMarker) {
207 String[] result = new String[2];
208 int begin = theString.indexOf(theMarker);
209 if (begin >= 0) {
210 result[0] = theString.substring(0, begin);
211 result[1] = theString.substring(begin);
212 } else {
213 result[0] = theString;
214 }
215 return result;
216 }
217
218 /**
219 * A result set for a message query.
220 *
221 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
222 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by
223 * $Author: jamesagnew $
224 */
225 public static interface Result {
226
227 /**
228 * @param theFieldNumber
229 * numbered from zero in the order they are specified in the
230 * query
231 * @return the corresponding value in the current row
232 */
233 public String get(int theFieldNumber);
234
235 /**
236 * @param theFieldName
237 * a field name as specified in the query with the keyword
238 * "as"
239 * @return the corresponding value in the current row
240 */
241 public String get(String theFieldName);
242
243 /**
244 * @return a list of named fields as defined with 'as' in the query
245 */
246 public String[] getNamedFields();
247
248 /**
249 * Advances to the next "row" of data if one is available.
250 *
251 * @return true if another row is available
252 * @throws HL7Exception
253 */
254 public boolean next() throws HL7Exception;
255
256 }
257
258 private static class ResultImpl implements Result {
259
260 private Terser myTerser;
261 private String[] myValues;
262 private String[] myLoopPoints;
263 private Map<String, Integer> myLoopPointNames;
264 private String[] myFieldPaths;
265 private Map<String, Integer> myFieldNames;
266 private int[] myIndices;
267 private int[] myNumEmpty; // number of empty sub-loops since last
268 // non-empty one
269 private int[] myMaxNumEmpty;
270 private boolean myNonLoopingQuery = false;
271 private String[] myWherePaths;
272 private String[] myWhereValues;
273 private String[] myWherePatterns;
274 private boolean[] myExactMatchFlags;
275
276 public ResultImpl(Message theMessage, String[] theLoopPoints,
277 Map<String, Integer> theLoopPointNames, String[] theFieldPaths,
278 Map<String, Integer> theFieldNames, String[] theWherePaths,
279 String[] theWherePatterns, boolean[] theExactMatchFlags) {
280
281 myTerser = new Terser(theMessage);
282 myLoopPoints = theLoopPoints;
283 myIndices = new int[theLoopPoints.length];
284 myNumEmpty = new int[theLoopPoints.length];
285 myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286 myLoopPointNames = theLoopPointNames;
287 myFieldPaths = theFieldPaths;
288 myValues = new String[theFieldPaths.length];
289 myFieldNames = theFieldNames;
290 myWherePaths = theWherePaths;
291 myWherePatterns = theWherePatterns;
292 myExactMatchFlags = theExactMatchFlags;
293
294 if (theLoopPoints.length == 0) {
295 myNonLoopingQuery = true; // if no loops, give ourselves 1
296 // iteration
297 } else {
298 myIndices[myIndices.length - 1] = -1; // start before 1st
299 // iteration
300 }
301
302 }
303
304 // extracts max number of empty iterations for each loop point (this is
305 // communicated
306 // as an optional integer after the *, e.g. blah(*3) ... default is 0).
307 private int[] getMaxNumEmpty(String[] theLoopPoints) {
308 int[] retVal = new int[theLoopPoints.length];
309 for (int i = 0; i < theLoopPoints.length; i++) {
310 retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
311 }
312 return retVal;
313 }
314
315 private int getMaxNumEmpty(String theLoopPoint) {
316 int retVal = 0; // default
317
318 Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
319 if (m.find()) {
320 String num = m.group(1);
321 retVal = Integer.parseInt(num);
322 }
323
324 return retVal;
325 }
326
327 // returns true if some field under the given loop point has a value at
328 // the present
329 // iteration
330 private boolean currentRowValued(int theLoopPoint) {
331 for (int i = 0; i < myFieldPaths.length; i++) {
332 if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
333 String value = myValues[i];
334 if (value != null && value.length() > 0) {
335 return true;
336 }
337 }
338 }
339 return false;
340 }
341
342 // returns true if the current row matches the where clause filters
343 private boolean currentRowMatchesFilter() {
344 for (int i = 0; i < myWhereValues.length; i++) {
345 if (myExactMatchFlags[i]) {
346 if (!myWherePatterns[i].equals(myWhereValues[i])) {
347 return false;
348 }
349 } else {
350 if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
351 return false;
352 }
353 }
354 }
355 return true;
356 }
357
358 // true if the given path references the given loop point (directly
359 // or indirectly)
360 private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
361 String path = theFieldPath;
362 int lp;
363 while ((lp = getLoopPointReference(path)) >= 0) {
364 if (lp == theLoopPoint) {
365 return true;
366 } else {
367 path = myLoopPoints[lp];
368 }
369 }
370 return false;
371 }
372
373 // expands a set of paths to their current loop point iterations, and
374 // gets
375 // current values from our message
376 private String[] getCurrentValues(String[] thePaths)
377 throws HL7Exception {
378 String[] paths = composePaths(thePaths);
379 String[] values = new String[paths.length];
380 for (int i = 0; i < paths.length; i++) {
381 values[i] = myTerser.get(paths[i]);
382 if (values[i] == null) {
383 values[i] = "";
384 }
385 }
386 return values;
387 }
388
389 // creates full Terser paths from current location, loop points, and
390 // given paths
391 // with loop point references
392 private String[] composePaths(String[] thePaths) {
393 String[] currentLoopPoints = composeLoopPoints();
394 String[] result = new String[thePaths.length];
395 for (int i = 0; i < thePaths.length; i++) {
396 result[i] = thePaths[i];
397 int ref = getLoopPointReference(thePaths[i]);
398 if (ref >= 0) {
399 result[i] = expandLoopPointReference(result[i],
400 currentLoopPoints[ref]);
401 }
402 }
403 return result;
404 }
405
406 // parameterizes loop points with present location (i.e. replaces * with
407 // current
408 // indices)
409 private String[] composeLoopPoints() {
410 String[] result = new String[myLoopPoints.length];
411 for (int i = 0; i < myLoopPoints.length; i++) {
412 result[i] = myLoopPoints[i].replaceAll("\\*\\d*",
413 String.valueOf(myIndices[i]));
414
415 int ref = getLoopPointReference(myLoopPoints[i]);
416 if (ref >= i) {
417 throw new IllegalStateException(
418 "Loop point must be defined after the "
419 + "one it references: " + myLoopPoints[i]);
420 } else if (ref >= 0) {
421 result[i] = expandLoopPointReference(result[i], result[ref]);
422 }
423 }
424 return result;
425 }
426
427 // extracts LP# of label between first '{' and first '}', or -1 if there
428 // isn't one
429 private int getLoopPointReference(String thePath) {
430 StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
431 if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
432 String ref = tok.nextToken();
433 return myLoopPointNames.get(ref);
434 } else {
435 return -1;
436 }
437 }
438
439 private String expandLoopPointReference(String thePath,
440 String theLoopPoint) {
441 return thePath.replaceAll("\\{.*\\}", theLoopPoint);
442 }
443
444 /**
445 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
446 */
447 public String get(int theFieldNumber) {
448 if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
449 throw new IllegalArgumentException(
450 "Field number must be between 0 and "
451 + (myValues.length - 1));
452 }
453 return myValues[theFieldNumber];
454 }
455
456 /**
457 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
458 */
459 public String get(String theFieldName) {
460 Integer fieldNum = myFieldNames.get(theFieldName);
461 if (fieldNum == null) {
462 throw new IllegalArgumentException(
463 "Field name not recognized: " + theFieldName);
464 }
465 return get(fieldNum);
466 }
467
468 /**
469 * @throws HL7Exception
470 * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
471 */
472 public boolean next() throws HL7Exception {
473 if (myNonLoopingQuery) {
474 myNonLoopingQuery = false;
475 myValues = getCurrentValues(myFieldPaths);
476 myWhereValues = getCurrentValues(myWherePaths);
477 return currentRowMatchesFilter();
478 }
479
480 boolean hasNext = false;
481 findNext: for (int i = myIndices.length - 1; i >= 0; i--) {
482 boolean gotMatch = false;
483 while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
484 myIndices[i]++;
485 myValues = getCurrentValues(myFieldPaths);
486 myWhereValues = getCurrentValues(myWherePaths);
487
488 if (!currentRowValued(i)) {
489 myNumEmpty[i]++;
490 } else {
491 myNumEmpty[i] = 0;
492 }
493 if (currentRowMatchesFilter()) {
494 gotMatch = true;
495 }
496 }
497
498 hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// &&
499 // currentRowMatchesFilter();
500 if (hasNext) {
501 break findNext;
502 }
503
504 myIndices[i] = 0;
505 myNumEmpty[i] = 0;
506
507 // TODO: if we aren't allowing empties in this loop, and have no
508 // value, we want to
509 // return the null in the super-loop. However, we don't know
510 // which loop point, if
511 // any, is the super-loop. If it was the next one we could do
512 // this ...
513 // if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0
514 // && myIndices[i-1] == 0) {
515 // myIndices[i-1] = -1;
516 // } ... but it may not be, so we'll ignore this problem for
517 // now.
518 }
519 return hasNext;
520 }
521
522 /**
523 * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
524 */
525 public String[] getNamedFields() {
526 return myFieldNames.keySet().toArray(new String[0]);
527 }
528
529 }
530
531 }