001    /**
002     * The contents of this file are subject to the Mozilla Public License Version 1.1
003     * (the "License"); you may not use this file except in compliance with the License.
004     * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005     * Software distributed under the License is distributed on an "AS IS" basis,
006     * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007     * specific language governing rights and limitations under the License.
008     *
009     * The Original Code is "MessageQuery.java".  Description:
010     * "Queries messages in an SQL-like style.  "
011     *
012     * The Initial Developer of the Original Code is University Health Network. Copyright (C)
013     * 2005.  All Rights Reserved.
014     *
015     * Contributor(s): ______________________________________.
016     *
017     * Alternatively, the contents of this file may be used under the terms of the
018     * GNU General Public License (the "GPL"), in which case the provisions of the GPL are
019     * applicable instead of those above.  If you wish to allow use of your version of this
020     * file only under the terms of the GPL and not to allow others to use your version
021     * of this file under the MPL, indicate your decision by deleting  the provisions above
022     * and replace  them with the notice and other provisions required by the GPL License.
023     * If you do not delete the provisions above, a recipient may use your version of
024     * this file under either the MPL or the GPL.
025     *
026     */
027    package ca.uhn.hl7v2.util;
028    
029    import java.util.ArrayList;
030    import java.util.HashMap;
031    import java.util.List;
032    import java.util.Map;
033    import java.util.Properties;
034    import java.util.StringTokenizer;
035    import java.util.regex.Matcher;
036    import java.util.regex.Pattern;
037    
038    import ca.uhn.hl7v2.HL7Exception;
039    import ca.uhn.hl7v2.model.Message;
040    
041    /**
042     * Queries messages in an SQL-like style. We get repeated row-like structures by
043     * looping over repetitions of groups, segments, or fields.
044     * 
045     * This is a very advanced class ... maybe too advanced even for you. If you
046     * find it confusing, please note that there are simpler ways to get data from a
047     * message (like calling its getters or using Terser).
048     * 
049     * LOOPING: You specify the loop points as part of the query. For example you
050     * could specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * is
051     * replaced by numbers 0, 1, 2, etc. as you loop through the results, so this
052     * example would loop through repetitions of MSH-18. If there are multiple loop
053     * points, the loops are nested so that each possible combination is returned.
054     * Looping stops when none of the fields under a loop point are valued. The name
055     * of the loop point ('x' in the example above) is arbitrary.
056     * 
057     * SELECTING FIELDS: The syntax is similar to SQL, except that Terser paths are
058     * used in place of table.field. You can use the "as" keyword to give a field a
059     * name, like this: <code>select /.MSH-7 as msg_date</code>. If your field is
060     * under a loop point, replace the path up to the loop point with a loop point
061     * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
062     * 
063     * SELECTING ROWS: A "row" is a combination of all selected fields at one
064     * iteration. You can filter which rows are returned using a where clause
065     * similar to that in SQL. Use exact values or regular expressions, for example:
066     * <code>where {1} like '.*blood.*'</code> or
067     * <code>where {1}/PID-3-1 = '111'</code> Multiple filters can be separated with
068     * commas (which mean 'and'). Future versions may support 'or', negation,
069     * brackets, etc., but this version doesn't.
070     * 
071     * FULL EXAMPLE: select {pat-id}-1 as id loop pat-id = ./PID-3(*) where
072     * {pat-id}-2 = 'mrn'
073     * 
074     * SUBTLETIES OF LOOPING: A loop point can be under another loop point. For
075     * example consider the message:
076     * 
077     * MSH|etc.|etc. Z01|one~two|a Z01|three~four|b
078     * 
079     * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: a
080     * one a two b three b four
081     * 
082     * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would
083     * return: a one a two b one b two
084     * 
085     * In the first case, one loop point refers to another. In the second case the
086     * loops are treated as independent, just as if they referred to different
087     * branches of the message.
088     * 
089     * TODO: could support distinct easily by keeping record of rows and comparing
090     * each one to previous rows
091     * 
092     * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
093     * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author:
094     *          jamesagnew $
095     * @deprecated
096     */
097    public class MessageQuery {
098    
099            /**
100             * @param theMessage
101             *            an HL7 message from which data are to be queried
102             * @param theQuery
103             *            the query (see class docs for syntax)
104             * @return data from the message that are selected by the query
105             */
106            public static Result query(Message theMessage, String theQuery) {
107                    Properties clauses = getClauses(theQuery);
108    
109                    // parse select clause
110                    StringTokenizer select = new StringTokenizer(
111                                    clauses.getProperty("select"), ", ", false);
112                    List<String> fieldPaths = new ArrayList<String>(10);
113                    Map<String, Integer> names = new HashMap<String, Integer>(10);
114                    while (select.hasMoreTokens()) {
115                            String token = select.nextToken();
116                            if (token.equals("as")) {
117                                    if (!select.hasMoreTokens()) {
118                                            throw new IllegalArgumentException(
119                                                            "Keyword 'as' must be followed by a field label");
120                                    }
121                                    names.put(select.nextToken(), fieldPaths.size() - 1);
122                            } else {
123                                    fieldPaths.add(token);
124                            }
125                    }
126    
127                    // parse loop clause
128                    StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop",
129                                    ""), ",", false);
130                    List<String> loopPoints = new ArrayList<String>(10);
131                    Map<String, Integer> loopPointNames = new HashMap<String, Integer>(10);
132                    while (loop.hasMoreTokens()) {
133                            String pointDecl = loop.nextToken();
134                            StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
135                            String name = tok.nextToken().trim();
136                            String path = tok.nextToken().trim();
137                            loopPoints.add(path);
138                            loopPointNames.put(name, loopPoints.size() - 1);
139                    }
140    
141                    // parse where clause
142                    // TODO: this will do for now but it should really be evaluated like an
143                    // expression
144                    // rather than a list
145                    StringTokenizer where = new StringTokenizer(clauses.getProperty(
146                                    "where", ""), ",", false);
147                    List<String> filters = new ArrayList<String>();
148                    while (where.hasMoreTokens()) {
149                            filters.add(where.nextToken());
150                    }
151                    String[] filterPaths = new String[filters.size()];
152                    String[] filterPatterns = new String[filters.size()];
153                    boolean[] exactFlags = new boolean[filters.size()];
154    
155                    for (int i = 0; i < filters.size(); i++) {
156                            exactFlags[i] = true;
157                            String filter = filters.get(i);
158                            String[] parts = splitFromEnd(filter, "=");
159                            if (parts[1] != null) {
160                                    parts[1] = parts[1].substring(1);
161                            } else {
162                                    exactFlags[i] = false;
163                                    parts = splitFromEnd(filter, "like");
164                                    parts[1] = parts[1].substring(4);
165                            }
166                            filterPaths[i] = parts[0].trim();
167                            parts[1] = parts[1].trim();
168                            filterPatterns[i] = parts[1].substring(1, parts[1].length() - 1);
169                    }
170    
171                    return new ResultImpl(theMessage,
172                                    loopPoints.toArray(new String[0]), loopPointNames,
173                                    fieldPaths.toArray(new String[0]), names,
174                                    filterPaths, filterPatterns, exactFlags);
175            }
176    
177            private static Properties getClauses(String theQuery) {
178                    Properties clauses = new Properties();
179    
180                    String[] split = splitFromEnd(theQuery, "where ");
181                    setClause(clauses, "where", split[1]);
182    
183                    split = splitFromEnd(split[0], "loop ");
184                    setClause(clauses, "loop", split[1]);
185                    setClause(clauses, "select", split[0]);
186    
187                    if (clauses.getProperty("where", "").indexOf("loop ") >= 0) {
188                            throw new IllegalArgumentException(
189                                            "The loop clause must precede the where clause");
190                    }
191                    if (clauses.getProperty("select") == null) {
192                            throw new IllegalArgumentException(
193                                            "The query must begin with a select clause");
194                    }
195                    return clauses;
196            }
197    
198            private static void setClause(Properties theClauses, String theName,
199                            String theClause) {
200                    if (theClause != null) {
201                            theClauses.setProperty(theName,
202                                            theClause.substring(theName.length()).trim());
203                    }
204            }
205    
206            private static String[] splitFromEnd(String theString, String theMarker) {
207                    String[] result = new String[2];
208                    int begin = theString.indexOf(theMarker);
209                    if (begin >= 0) {
210                            result[0] = theString.substring(0, begin);
211                            result[1] = theString.substring(begin);
212                    } else {
213                            result[0] = theString;
214                    }
215                    return result;
216            }
217    
218            /**
219             * A result set for a message query.
220             * 
221             * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
222             * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by
223             *          $Author: jamesagnew $
224             */
225            public static interface Result {
226    
227                    /**
228                     * @param theFieldNumber
229                     *            numbered from zero in the order they are specified in the
230                     *            query
231                     * @return the corresponding value in the current row
232                     */
233                    public String get(int theFieldNumber);
234    
235                    /**
236                     * @param theFieldName
237                     *            a field name as specified in the query with the keyword
238                     *            "as"
239                     * @return the corresponding value in the current row
240                     */
241                    public String get(String theFieldName);
242    
243                    /**
244                     * @return a list of named fields as defined with 'as' in the query
245                     */
246                    public String[] getNamedFields();
247    
248                    /**
249                     * Advances to the next "row" of data if one is available.
250                     * 
251                     * @return true if another row is available
252                     * @throws HL7Exception
253                     */
254                    public boolean next() throws HL7Exception;
255    
256            }
257    
258            private static class ResultImpl implements Result {
259    
260                    private Terser myTerser;
261                    private String[] myValues;
262                    private String[] myLoopPoints;
263                    private Map<String, Integer> myLoopPointNames;
264                    private String[] myFieldPaths;
265                    private Map<String, Integer> myFieldNames;
266                    private int[] myIndices;
267                    private int[] myNumEmpty; // number of empty sub-loops since last
268                                                                            // non-empty one
269                    private int[] myMaxNumEmpty;
270                    private boolean myNonLoopingQuery = false;
271                    private String[] myWherePaths;
272                    private String[] myWhereValues;
273                    private String[] myWherePatterns;
274                    private boolean[] myExactMatchFlags;
275    
276                    public ResultImpl(Message theMessage, String[] theLoopPoints,
277                                    Map<String, Integer> theLoopPointNames, String[] theFieldPaths,
278                                    Map<String, Integer> theFieldNames, String[] theWherePaths,
279                                    String[] theWherePatterns, boolean[] theExactMatchFlags) {
280    
281                            myTerser = new Terser(theMessage);
282                            myLoopPoints = theLoopPoints;
283                            myIndices = new int[theLoopPoints.length];
284                            myNumEmpty = new int[theLoopPoints.length];
285                            myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286                            myLoopPointNames = theLoopPointNames;
287                            myFieldPaths = theFieldPaths;
288                            myValues = new String[theFieldPaths.length];
289                            myFieldNames = theFieldNames;
290                            myWherePaths = theWherePaths;
291                            myWherePatterns = theWherePatterns;
292                            myExactMatchFlags = theExactMatchFlags;
293    
294                            if (theLoopPoints.length == 0) {
295                                    myNonLoopingQuery = true; // if no loops, give ourselves 1
296                                                                                            // iteration
297                            } else {
298                                    myIndices[myIndices.length - 1] = -1; // start before 1st
299                                                                                                                    // iteration
300                            }
301    
302                    }
303    
304                    // extracts max number of empty iterations for each loop point (this is
305                    // communicated
306                    // as an optional integer after the *, e.g. blah(*3) ... default is 0).
307                    private int[] getMaxNumEmpty(String[] theLoopPoints) {
308                            int[] retVal = new int[theLoopPoints.length];
309                            for (int i = 0; i < theLoopPoints.length; i++) {
310                                    retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
311                            }
312                            return retVal;
313                    }
314    
315                    private int getMaxNumEmpty(String theLoopPoint) {
316                            int retVal = 0; // default
317    
318                            Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
319                            if (m.find()) {
320                                    String num = m.group(1);
321                                    retVal = Integer.parseInt(num);
322                            }
323    
324                            return retVal;
325                    }
326    
327                    // returns true if some field under the given loop point has a value at
328                    // the present
329                    // iteration
330                    private boolean currentRowValued(int theLoopPoint) {
331                            for (int i = 0; i < myFieldPaths.length; i++) {
332                                    if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
333                                            String value = myValues[i];
334                                            if (value != null && value.length() > 0) {
335                                                    return true;
336                                            }
337                                    }
338                            }
339                            return false;
340                    }
341    
342                    // returns true if the current row matches the where clause filters
343                    private boolean currentRowMatchesFilter() {
344                            for (int i = 0; i < myWhereValues.length; i++) {
345                                    if (myExactMatchFlags[i]) {
346                                            if (!myWherePatterns[i].equals(myWhereValues[i])) {
347                                                    return false;
348                                            }
349                                    } else {
350                                            if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
351                                                    return false;
352                                            }
353                                    }
354                            }
355                            return true;
356                    }
357    
358                    // true if the given path references the given loop point (directly
359                    // or indirectly)
360                    private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
361                            String path = theFieldPath;
362                            int lp;
363                            while ((lp = getLoopPointReference(path)) >= 0) {
364                                    if (lp == theLoopPoint) {
365                                            return true;
366                                    } else {
367                                            path = myLoopPoints[lp];
368                                    }
369                            }
370                            return false;
371                    }
372    
373                    // expands a set of paths to their current loop point iterations, and
374                    // gets
375                    // current values from our message
376                    private String[] getCurrentValues(String[] thePaths)
377                                    throws HL7Exception {
378                            String[] paths = composePaths(thePaths);
379                            String[] values = new String[paths.length];
380                            for (int i = 0; i < paths.length; i++) {
381                                    values[i] = myTerser.get(paths[i]);
382                                    if (values[i] == null) {
383                                            values[i] = "";
384                                    }
385                            }
386                            return values;
387                    }
388    
389                    // creates full Terser paths from current location, loop points, and
390                    // given paths
391                    // with loop point references
392                    private String[] composePaths(String[] thePaths) {
393                            String[] currentLoopPoints = composeLoopPoints();
394                            String[] result = new String[thePaths.length];
395                            for (int i = 0; i < thePaths.length; i++) {
396                                    result[i] = thePaths[i];
397                                    int ref = getLoopPointReference(thePaths[i]);
398                                    if (ref >= 0) {
399                                            result[i] = expandLoopPointReference(result[i],
400                                                            currentLoopPoints[ref]);
401                                    }
402                            }
403                            return result;
404                    }
405    
406                    // parameterizes loop points with present location (i.e. replaces * with
407                    // current
408                    // indices)
409                    private String[] composeLoopPoints() {
410                            String[] result = new String[myLoopPoints.length];
411                            for (int i = 0; i < myLoopPoints.length; i++) {
412                                    result[i] = myLoopPoints[i].replaceAll("\\*\\d*",
413                                                    String.valueOf(myIndices[i]));
414    
415                                    int ref = getLoopPointReference(myLoopPoints[i]);
416                                    if (ref >= i) {
417                                            throw new IllegalStateException(
418                                                            "Loop point must be defined after the "
419                                                                            + "one it references: " + myLoopPoints[i]);
420                                    } else if (ref >= 0) {
421                                            result[i] = expandLoopPointReference(result[i], result[ref]);
422                                    }
423                            }
424                            return result;
425                    }
426    
427                    // extracts LP# of label between first '{' and first '}', or -1 if there
428                    // isn't one
429                    private int getLoopPointReference(String thePath) {
430                            StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
431                            if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
432                                    String ref = tok.nextToken();
433                                    return myLoopPointNames.get(ref);
434                            } else {
435                                    return -1;
436                            }
437                    }
438    
439                    private String expandLoopPointReference(String thePath,
440                                    String theLoopPoint) {
441                            return thePath.replaceAll("\\{.*\\}", theLoopPoint);
442                    }
443    
444                    /**
445                     * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
446                     */
447                    public String get(int theFieldNumber) {
448                            if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
449                                    throw new IllegalArgumentException(
450                                                    "Field number must be between 0 and "
451                                                                    + (myValues.length - 1));
452                            }
453                            return myValues[theFieldNumber];
454                    }
455    
456                    /**
457                     * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
458                     */
459                    public String get(String theFieldName) {
460                            Integer fieldNum = myFieldNames.get(theFieldName);
461                            if (fieldNum == null) {
462                                    throw new IllegalArgumentException(
463                                                    "Field name not recognized: " + theFieldName);
464                            }
465                            return get(fieldNum);
466                    }
467    
468                    /**
469                     * @throws HL7Exception
470                     * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
471                     */
472                    public boolean next() throws HL7Exception {
473                            if (myNonLoopingQuery) {
474                                    myNonLoopingQuery = false;
475                                    myValues = getCurrentValues(myFieldPaths);
476                                    myWhereValues = getCurrentValues(myWherePaths);
477                                    return currentRowMatchesFilter();
478                            }
479    
480                            boolean hasNext = false;
481                            findNext: for (int i = myIndices.length - 1; i >= 0; i--) {
482                                    boolean gotMatch = false;
483                                    while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
484                                            myIndices[i]++;
485                                            myValues = getCurrentValues(myFieldPaths);
486                                            myWhereValues = getCurrentValues(myWherePaths);
487    
488                                            if (!currentRowValued(i)) {
489                                                    myNumEmpty[i]++;
490                                            } else {
491                                                    myNumEmpty[i] = 0;
492                                            }
493                                            if (currentRowMatchesFilter()) {
494                                                    gotMatch = true;
495                                            }
496                                    }
497    
498                                    hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// &&
499                                                                                                                            // currentRowMatchesFilter();
500                                    if (hasNext) {
501                                            break findNext;
502                                    }
503    
504                                    myIndices[i] = 0;
505                                    myNumEmpty[i] = 0;
506    
507                                    // TODO: if we aren't allowing empties in this loop, and have no
508                                    // value, we want to
509                                    // return the null in the super-loop. However, we don't know
510                                    // which loop point, if
511                                    // any, is the super-loop. If it was the next one we could do
512                                    // this ...
513                                    // if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0
514                                    // && myIndices[i-1] == 0) {
515                                    // myIndices[i-1] = -1;
516                                    // } ... but it may not be, so we'll ignore this problem for
517                                    // now.
518                            }
519                            return hasNext;
520                    }
521    
522                    /**
523                     * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
524                     */
525                    public String[] getNamedFields() {
526                            return myFieldNames.keySet().toArray(new String[0]);
527                    }
528    
529            }
530    
531    }