001/*
002 * Units of Measurement Systems
003 * Copyright (c) 2005-2021, Jean-Marie Dautelle, Werner Keil and others.
004 *
005 * All rights reserved.
006 *
007 * Redistribution and use in source and binary forms, with or without modification,
008 * are permitted provided that the following conditions are met:
009 *
010 * 1. Redistributions of source code must retain the above copyright notice,
011 *    this list of conditions and the following disclaimer.
012 *
013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
014 *    and the following disclaimer in the documentation and/or other materials provided with the distribution.
015 *
016 * 3. Neither the name of JSR-385, Units of Measurement nor the names of their contributors may be used to
017 *    endorse or promote products derived from this software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package systems.uom.ucum.format;
031
032import java.io.ByteArrayInputStream;
033import java.io.IOException;
034import java.text.ParsePosition;
035import java.util.Arrays;
036import java.util.LinkedHashMap;
037import java.util.List;
038import java.util.Locale;
039import java.util.Map;
040import java.util.Map.Entry;
041import java.util.ResourceBundle;
042
043import javax.measure.MetricPrefix;
044import javax.measure.Quantity;
045import javax.measure.Unit;
046import javax.measure.UnitConverter;
047import javax.measure.format.MeasurementParseException;
048
049import static systems.uom.ucum.format.UCUMConverterFormatter.formatConverter;
050import static tech.units.indriya.AbstractUnit.ONE;
051
052import si.uom.SI;
053import systems.uom.ucum.format.UCUMFormatHelper.SymbolProvider;
054import systems.uom.ucum.internal.format.UCUMFormatParser;
055import tech.units.indriya.AbstractUnit;
056import tech.units.indriya.format.AbstractUnitFormat;
057import tech.units.indriya.format.SymbolMap;
058import tech.units.indriya.format.TokenException;
059import tech.units.indriya.format.TokenMgrError;
060import tech.units.indriya.function.MultiplyConverter;
061import tech.units.indriya.unit.TransformedUnit;
062
063/**
064 * <p>
065 * This class provides the interface for formatting and parsing {@link Unit units} according to the
066 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of Measure</a> (UCUM).
067 * </p>
068 *
069 * <p>
070 * For a technical/historical overview of this format please read <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354">
071 * CommonUnits of Measure in Clinical Information Systems</a>.
072 * </p>
073 *
074 * <p>
075 * As of revision 1.16, the BNF in the UCUM standard contains an <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to work around
076 * the problem by modifying the BNF productions for &lt;Term&gt;. Once the error in the standard is corrected, it may be necessary to modify the
077 * productions in the UCUMFormatParser.jj file to conform to the standard.
078 * </p>
079 *
080 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a>
081 * @author <a href="mailto:werner@uom.systems">Werner Keil</a>
082 * @author Andi Huber
083 * @version 2.3, 7 December 2020
084 */
085public abstract class UCUMFormat extends AbstractUnitFormat {
086    /**
087     * 
088     */
089    // private static final long serialVersionUID = 8586656823290135155L;
090
091    // A helper to declare bundle names for all instances
092    private static final String BUNDLE_BASE = UCUMFormat.class.getName();
093
094    // /////////////////
095    // Class methods //
096    // /////////////////
097
098    /**
099     * Returns the instance for formatting/parsing using the given variant
100     * 
101     * @param variant
102     *            the <strong>UCUM</strong> variant to use
103     * @return a {@link UCUMFormat} instance
104     */
105    public static UCUMFormat getInstance(Variant variant) {
106        switch (variant) {
107            case CASE_INSENSITIVE:
108                return Parsing.DEFAULT_CI;
109            case CASE_SENSITIVE:
110                return Parsing.DEFAULT_CS;
111            case PRINT:
112                return Print.DEFAULT;
113            default:
114                throw new IllegalArgumentException("Unknown variant: " + variant);
115        }
116    }
117
118    /**
119     * Returns an instance for formatting and parsing using user defined symbols
120     * 
121     * @param variant
122     *            the <strong>UCUM</strong> variant to use
123     * @param symbolMap
124     *            the map of user defined symbols to use
125     * @return a {@link UCUMFormat} instance
126     */
127    public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) {
128        switch (variant) {
129            case CASE_INSENSITIVE:
130                return new Parsing(symbolMap, false);
131            case CASE_SENSITIVE:
132                return new Parsing(symbolMap, true);
133            case PRINT:
134                return new Print(symbolMap);
135            default:
136                throw new IllegalArgumentException("Unknown variant: " + variant);
137        }
138    }
139
140    /**
141     * The symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s.
142     */
143    final SymbolMap symbolMap;
144
145    /**
146     * Get the symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s, etc...
147     * 
148     * @return SymbolMap the current symbol map
149     */    
150    protected SymbolMap getSymbols() {
151        return symbolMap;
152    }
153
154    //////////////////
155    // Constructors //
156    //////////////////
157    /**
158     * Base constructor.
159     */
160    UCUMFormat(SymbolMap symbolMap) {
161        this.symbolMap = symbolMap;
162    }
163
164    /////////////
165    // Parsing //
166    /////////////
167    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws MeasurementParseException;
168
169    protected Unit<?> parse(CharSequence csq, int index) throws MeasurementParseException {
170        return parse(csq, new ParsePosition(index));
171    }
172
173    @Override
174    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws MeasurementParseException;
175
176    ////////////////
177    // Formatting //
178    ////////////////
179    @SuppressWarnings({ "rawtypes" })
180    public Appendable format(final Unit<?> unknownUnit, Appendable appendable) throws IOException {
181        
182        if (!(unknownUnit instanceof AbstractUnit)) {
183            throw new UnsupportedOperationException("The UCUM format supports only known units (Comparable units)");
184        }
185        
186        final AbstractUnit unit = (AbstractUnit) unknownUnit;
187        final UCUMFormatHelper formatHelper = UCUMFormatHelper.of(this, unit);
188        final CharSequence symbol = formatHelper.findSymbolFor(symbolProviders, unit);
189        
190        if (symbol == null) {
191            throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit " + unit.getClass().getName() + "). "
192                    + "Custom units types should override the toString() method as the default implementation uses the UCUM format.");
193        }
194
195        appendable.append(symbol);
196        formatHelper.appendAnnotation(symbol, appendable);
197
198        return appendable;
199    }
200    
201    // -- SYMBOL PROVIDERS
202    
203    /* processed in order of declaration, the first to return a non-null string wins */
204    private final SymbolProvider[] symbolProviders = {
205            this::symbolFromLookupMap,
206            this::symbolForTransformedUnit,
207            this::symbolForKilogram,
208            this::symbolForProductUnits,
209            this::symbolForNonSystemUnit,
210            this::symbolFromField,
211            };
212    
213    private CharSequence symbolFromLookupMap(AbstractUnit<?> unit) throws IOException {
214        return symbolMap.getSymbol(unit);
215    }
216    
217    private CharSequence symbolFromField(AbstractUnit<?> unit) throws IOException {
218        return unit.getSymbol();
219    }
220    
221    @SuppressWarnings({ "unchecked", "rawtypes" })
222    private CharSequence symbolForTransformedUnit(AbstractUnit unit) throws IOException {
223        if (!(unit instanceof TransformedUnit)) {
224            return null;    
225        }
226        final StringBuilder sb = new StringBuilder();
227        final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit();
228        final UnitConverter converter = 
229                UCUMFormatHelper.toKnownPrefixConverterIfPossible(unit.getConverterTo(parentUnit));
230        final boolean printSeparator = !ONE.equals(parentUnit);
231
232        if (printSeparator && converter instanceof MultiplyConverter) { // workaround for #166
233                format(parentUnit, sb);
234        }
235        formatConverter(converter, printSeparator, sb, symbolMap);
236
237        return sb;
238    }
239    
240    @SuppressWarnings({ "unchecked", "rawtypes" })
241    private CharSequence symbolForProductUnits(AbstractUnit unit) throws IOException {
242        final Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits();
243        
244        if (productUnits == null) {
245            return null;
246        }
247        
248        final StringBuilder sb = new StringBuilder();
249        final Map<AbstractUnit<?>, Integer> numeratorUnits = new LinkedHashMap<>();            
250        final Map<AbstractUnit<?>, Integer> denominatorUnits = new LinkedHashMap<>();
251
252        // divide units into numerators and denominators
253        for (Entry<? extends AbstractUnit<?>, Integer> u : productUnits.entrySet()) {
254            if (u.getValue() > 0) {
255                numeratorUnits.put(u.getKey(), u.getValue());
256            }else {
257                denominatorUnits.put(u.getKey(), u.getValue());
258            }
259        }
260        
261        int numeratorCount = 1;
262        for (Entry<? extends AbstractUnit<?>, Integer> u : numeratorUnits.entrySet()) {
263            // add multiplication separators after first unit
264            if (numeratorCount > 1){
265                sb.append(".");
266            }
267            // add individual unit string
268            format(u.getKey(),sb);
269            // add power number if greater than 1
270            if (u.getValue() > 1){
271                sb.append(u.getValue());
272            }
273            numeratorCount++;
274        }
275        // special case if there is no numerator append one for inverse
276        if (numeratorCount == 1) {
277            sb.append("1");
278        }
279        if (denominatorUnits.size() > 0){
280            // append division symbol
281            sb.append("/");
282            int denominatorCount = 1;
283            for (Entry<? extends AbstractUnit<?>, Integer> u : denominatorUnits.entrySet()) {
284                // if there is more than one denominator unit and this is the first, add open parenthesis 
285                if (denominatorCount == 1 && denominatorUnits.size() > 1 ) {
286                    sb.append("(");
287                }
288                // add multiplication separators after first unit
289                if (denominatorCount > 1){
290                    sb.append(".");
291                }
292                // add individual unit string
293                format(u.getKey(),sb);
294                // add power number if abs greater than 1
295                if (u.getValue() < -1){
296                    sb.append(-u.getValue());
297                }
298                // if there is more than one denominator unit and this is the last, add close parenthesis
299                if (denominatorCount == denominatorUnits.size() && denominatorUnits.size() > 1 ) {
300                    sb.append(")");
301                }
302                denominatorCount++;
303            }
304        }            
305        return sb;
306    }
307    
308    @SuppressWarnings({ "unchecked", "rawtypes" })
309    private CharSequence symbolForKilogram(AbstractUnit unit) throws IOException {
310        
311        final Unit<?> systemUnit = unit.getSystemUnit();
312        if (!systemUnit.equals(SI.KILOGRAM)) {
313            return null;
314        }
315
316        final UnitConverter converter = 
317                UCUMFormatHelper.toKnownPrefixConverterIfPossible(
318                        unit.getConverterTo(systemUnit)
319                        .concatenate(MultiplyConverter.ofPrefix(MetricPrefix.KILO)));
320        
321        final StringBuilder sb = new StringBuilder();
322        final boolean printSeparator = true;
323        
324        // A special case because KILOGRAM is a BaseUnit instead of
325        // a transformed unit, for compatibility with existing SI
326        // unit system.
327        format(SI.GRAM, sb);
328        formatConverter(converter, printSeparator, sb, symbolMap);    
329        
330        return sb;
331    }
332    
333    @SuppressWarnings({ "unchecked", "rawtypes" })
334    private CharSequence symbolForNonSystemUnit(AbstractUnit unit) throws IOException {
335        
336        if (unit.isSystemUnit()) {
337            return null;
338        }
339        
340        final Unit<?> parentUnit = unit.getSystemUnit();
341        final UnitConverter converter = unit.getConverterTo(parentUnit);
342        final StringBuilder sb = new StringBuilder();
343        final boolean printSeparator = !parentUnit.equals(ONE);
344        
345        format(parentUnit, sb);
346        formatConverter(converter, printSeparator, sb, symbolMap);
347        
348        return sb;
349    }
350    
351    // ---
352    
353    public void label(Unit<?> unit, String label) {
354        throw new UnsupportedOperationException("label() not supported by this implementation");
355    }
356
357    public boolean isLocaleSensitive() {
358        return false;
359    }
360
361    void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
362        appendable.append('{');
363        appendable.append(annotation);
364        appendable.append('}');
365    }
366
367    ///////////////////
368    // Inner classes //
369    ///////////////////
370
371    /**
372     * Variant of unit representation in the UCUM standard
373     * 
374     * @see <a href= "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> UCUM - Character Set and Lexical Rules</a>
375     */
376    public static enum Variant {
377        CASE_SENSITIVE, CASE_INSENSITIVE, PRINT
378    }
379
380    /**
381     * The Print format is used to output units according to the "print" column in the UCUM standard. Because "print" symbols in UCUM are not unique,
382     * this class of UCUMFormat may not be used for parsing, only for formatting.
383     */
384    private static final class Print extends UCUMFormat {
385
386        /**
387         *
388         */
389        // private static final long serialVersionUID = 2990875526976721414L;
390        private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print"));
391        private static final Print DEFAULT = new Print(PRINT_SYMBOLS);
392
393        public Print(SymbolMap symbols) {
394            super(symbols);
395        }
396
397        @Override
398        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException {
399            throw new UnsupportedOperationException("The print format is for pretty-printing of units only. Parsing is not supported.");
400        }
401
402        @Override
403        void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
404            if (symbol != null && symbol.length() > 0) {
405                appendable.append('(');
406                appendable.append(annotation);
407                appendable.append(')');
408            } else {
409                appendable.append(annotation);
410            }
411        }
412
413        @Override
414        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException {
415            return parse(csq, new ParsePosition(0));
416
417        }
418        
419                @Override
420                public String toString() {
421                        return "UCUM Print";
422                }
423    }
424
425    /**
426     * The Parsing format outputs formats and parses units according to the "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap
427     * is passed to its constructor.
428     */
429    private static final class Parsing extends UCUMFormat {
430        // private static final long serialVersionUID = -922531801940132715L;
431        private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap
432                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() {
433                    @Override
434                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
435                        if (baseName == null)
436                            throw new NullPointerException();
437                        if (locale.equals(new Locale("", "CS"))) {
438                            return Arrays.asList(locale, Locale.ROOT);
439                        }
440                        return super.getCandidateLocales(baseName, locale);
441                    }
442                }));
443        private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap
444                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() {
445                    @Override
446                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
447                        if (baseName == null)
448                            throw new NullPointerException();
449                        if (locale.equals(new Locale("", "CI"))) {
450                            return Arrays.asList(locale, Locale.ROOT);
451                        } else if (locale.equals(Locale.GERMANY)) {
452                            // TODO why GERMANY?
453                            return Arrays.asList(locale,
454                                    // no Locale.GERMAN here
455                                    Locale.ROOT);
456                        }
457                        return super.getCandidateLocales(baseName, locale);
458                    }
459                }));
460        private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true);
461        private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false);
462        private final boolean caseSensitive;
463
464        public Parsing(SymbolMap symbols, boolean caseSensitive) {
465            super(symbols);
466            this.caseSensitive = caseSensitive;
467        }
468
469        @Override
470        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws MeasurementParseException {
471            // Parsing reads the whole character sequence from the parse position.
472            int start = cursor.getIndex();
473            int end = csq.length();
474            if (end <= start) {
475                return ONE;
476            }
477            String source = csq.subSequence(start, end).toString().trim();
478            if (source.length() == 0) {
479                return ONE;
480            }
481            if (!caseSensitive) {
482                source = source.toUpperCase();
483            }
484            UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes()));
485            try {
486                Unit<?> result = parser.parseUnit();
487                cursor.setIndex(end);
488                return result;
489            } catch (TokenException e) {
490                if (e.getToken() != null) {
491                    cursor.setErrorIndex(start + e.getToken().endColumn);
492                } else {
493                    cursor.setErrorIndex(start);
494                }
495                throw new MeasurementParseException(e);
496            } catch (TokenMgrError e) {
497                cursor.setErrorIndex(start);
498                throw new IllegalArgumentException(e.getMessage());
499            }
500        }
501
502        @Override
503        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws MeasurementParseException {
504            return parse(csq, new ParsePosition(0));
505        }
506
507                @Override
508                public String toString() {
509                        return "UCUM Parsing [" +  
510                                        (caseSensitive ? "Case Sensitive" : "Case Insensitive") + 
511                                        "]";
512                }
513    }
514}