001/* 002 * Units of Measurement Systems 003 * Copyright (c) 2005-2021, Jean-Marie Dautelle, Werner Keil and others. 004 * 005 * All rights reserved. 006 * 007 * Redistribution and use in source and binary forms, with or without modification, 008 * are permitted provided that the following conditions are met: 009 * 010 * 1. Redistributions of source code must retain the above copyright notice, 011 * this list of conditions and the following disclaimer. 012 * 013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 014 * and the following disclaimer in the documentation and/or other materials provided with the distribution. 015 * 016 * 3. Neither the name of JSR-385, Units of Measurement nor the names of their contributors may be used to 017 * endorse or promote products derived from this software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package systems.uom.ucum.format; 031 032import java.io.ByteArrayInputStream; 033import java.io.IOException; 034import java.text.ParsePosition; 035import java.util.Arrays; 036import java.util.LinkedHashMap; 037import java.util.List; 038import java.util.Locale; 039import java.util.Map; 040import java.util.Map.Entry; 041import java.util.ResourceBundle; 042 043import javax.measure.MetricPrefix; 044import javax.measure.Quantity; 045import javax.measure.Unit; 046import javax.measure.UnitConverter; 047import javax.measure.format.MeasurementParseException; 048 049import static systems.uom.ucum.format.UCUMConverterFormatter.formatConverter; 050import static tech.units.indriya.AbstractUnit.ONE; 051 052import si.uom.SI; 053import systems.uom.ucum.format.UCUMFormatHelper.SymbolProvider; 054import systems.uom.ucum.internal.format.UCUMFormatParser; 055import tech.units.indriya.AbstractUnit; 056import tech.units.indriya.format.AbstractUnitFormat; 057import tech.units.indriya.format.SymbolMap; 058import tech.units.indriya.format.TokenException; 059import tech.units.indriya.format.TokenMgrError; 060import tech.units.indriya.function.MultiplyConverter; 061import tech.units.indriya.unit.TransformedUnit; 062 063/** 064 * <p> 065 * This class provides the interface for formatting and parsing {@link Unit units} according to the 066 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of Measure</a> (UCUM). 067 * </p> 068 * 069 * <p> 070 * For a technical/historical overview of this format please read <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354"> 071 * CommonUnits of Measure in Clinical Information Systems</a>. 072 * </p> 073 * 074 * <p> 075 * As of revision 1.16, the BNF in the UCUM standard contains an <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to work around 076 * the problem by modifying the BNF productions for <Term>. Once the error in the standard is corrected, it may be necessary to modify the 077 * productions in the UCUMFormatParser.jj file to conform to the standard. 078 * </p> 079 * 080 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a> 081 * @author <a href="mailto:werner@uom.systems">Werner Keil</a> 082 * @author Andi Huber 083 * @version 2.3, 7 December 2020 084 */ 085public abstract class UCUMFormat extends AbstractUnitFormat { 086 /** 087 * 088 */ 089 // private static final long serialVersionUID = 8586656823290135155L; 090 091 // A helper to declare bundle names for all instances 092 private static final String BUNDLE_BASE = UCUMFormat.class.getName(); 093 094 // ///////////////// 095 // Class methods // 096 // ///////////////// 097 098 /** 099 * Returns the instance for formatting/parsing using the given variant 100 * 101 * @param variant 102 * the <strong>UCUM</strong> variant to use 103 * @return a {@link UCUMFormat} instance 104 */ 105 public static UCUMFormat getInstance(Variant variant) { 106 switch (variant) { 107 case CASE_INSENSITIVE: 108 return Parsing.DEFAULT_CI; 109 case CASE_SENSITIVE: 110 return Parsing.DEFAULT_CS; 111 case PRINT: 112 return Print.DEFAULT; 113 default: 114 throw new IllegalArgumentException("Unknown variant: " + variant); 115 } 116 } 117 118 /** 119 * Returns an instance for formatting and parsing using user defined symbols 120 * 121 * @param variant 122 * the <strong>UCUM</strong> variant to use 123 * @param symbolMap 124 * the map of user defined symbols to use 125 * @return a {@link UCUMFormat} instance 126 */ 127 public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) { 128 switch (variant) { 129 case CASE_INSENSITIVE: 130 return new Parsing(symbolMap, false); 131 case CASE_SENSITIVE: 132 return new Parsing(symbolMap, true); 133 case PRINT: 134 return new Print(symbolMap); 135 default: 136 throw new IllegalArgumentException("Unknown variant: " + variant); 137 } 138 } 139 140 /** 141 * The symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s. 142 */ 143 final SymbolMap symbolMap; 144 145 /** 146 * Get the symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s, etc... 147 * 148 * @return SymbolMap the current symbol map 149 */ 150 protected SymbolMap getSymbols() { 151 return symbolMap; 152 } 153 154 ////////////////// 155 // Constructors // 156 ////////////////// 157 /** 158 * Base constructor. 159 */ 160 UCUMFormat(SymbolMap symbolMap) { 161 this.symbolMap = symbolMap; 162 } 163 164 ///////////// 165 // Parsing // 166 ///////////// 167 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws MeasurementParseException; 168 169 protected Unit<?> parse(CharSequence csq, int index) throws MeasurementParseException { 170 return parse(csq, new ParsePosition(index)); 171 } 172 173 @Override 174 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws MeasurementParseException; 175 176 //////////////// 177 // Formatting // 178 //////////////// 179 @SuppressWarnings({ "rawtypes" }) 180 public Appendable format(final Unit<?> unknownUnit, Appendable appendable) throws IOException { 181 182 if (!(unknownUnit instanceof AbstractUnit)) { 183 throw new UnsupportedOperationException("The UCUM format supports only known units (Comparable units)"); 184 } 185 186 final AbstractUnit unit = (AbstractUnit) unknownUnit; 187 final UCUMFormatHelper formatHelper = UCUMFormatHelper.of(this, unit); 188 final CharSequence symbol = formatHelper.findSymbolFor(symbolProviders, unit); 189 190 if (symbol == null) { 191 throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit " + unit.getClass().getName() + "). " 192 + "Custom units types should override the toString() method as the default implementation uses the UCUM format."); 193 } 194 195 appendable.append(symbol); 196 formatHelper.appendAnnotation(symbol, appendable); 197 198 return appendable; 199 } 200 201 // -- SYMBOL PROVIDERS 202 203 /* processed in order of declaration, the first to return a non-null string wins */ 204 private final SymbolProvider[] symbolProviders = { 205 this::symbolFromLookupMap, 206 this::symbolForTransformedUnit, 207 this::symbolForKilogram, 208 this::symbolForProductUnits, 209 this::symbolForNonSystemUnit, 210 this::symbolFromField, 211 }; 212 213 private CharSequence symbolFromLookupMap(AbstractUnit<?> unit) throws IOException { 214 return symbolMap.getSymbol(unit); 215 } 216 217 private CharSequence symbolFromField(AbstractUnit<?> unit) throws IOException { 218 return unit.getSymbol(); 219 } 220 221 @SuppressWarnings({ "unchecked", "rawtypes" }) 222 private CharSequence symbolForTransformedUnit(AbstractUnit unit) throws IOException { 223 if (!(unit instanceof TransformedUnit)) { 224 return null; 225 } 226 final StringBuilder sb = new StringBuilder(); 227 final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit(); 228 final UnitConverter converter = 229 UCUMFormatHelper.toKnownPrefixConverterIfPossible(unit.getConverterTo(parentUnit)); 230 final boolean printSeparator = !ONE.equals(parentUnit); 231 232 if (printSeparator && converter instanceof MultiplyConverter) { // workaround for #166 233 format(parentUnit, sb); 234 } 235 formatConverter(converter, printSeparator, sb, symbolMap); 236 237 return sb; 238 } 239 240 @SuppressWarnings({ "unchecked", "rawtypes" }) 241 private CharSequence symbolForProductUnits(AbstractUnit unit) throws IOException { 242 final Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits(); 243 244 if (productUnits == null) { 245 return null; 246 } 247 248 final StringBuilder sb = new StringBuilder(); 249 final Map<AbstractUnit<?>, Integer> numeratorUnits = new LinkedHashMap<>(); 250 final Map<AbstractUnit<?>, Integer> denominatorUnits = new LinkedHashMap<>(); 251 252 // divide units into numerators and denominators 253 for (Entry<? extends AbstractUnit<?>, Integer> u : productUnits.entrySet()) { 254 if (u.getValue() > 0) { 255 numeratorUnits.put(u.getKey(), u.getValue()); 256 }else { 257 denominatorUnits.put(u.getKey(), u.getValue()); 258 } 259 } 260 261 int numeratorCount = 1; 262 for (Entry<? extends AbstractUnit<?>, Integer> u : numeratorUnits.entrySet()) { 263 // add multiplication separators after first unit 264 if (numeratorCount > 1){ 265 sb.append("."); 266 } 267 // add individual unit string 268 format(u.getKey(),sb); 269 // add power number if greater than 1 270 if (u.getValue() > 1){ 271 sb.append(u.getValue()); 272 } 273 numeratorCount++; 274 } 275 // special case if there is no numerator append one for inverse 276 if (numeratorCount == 1) { 277 sb.append("1"); 278 } 279 if (denominatorUnits.size() > 0){ 280 // append division symbol 281 sb.append("/"); 282 int denominatorCount = 1; 283 for (Entry<? extends AbstractUnit<?>, Integer> u : denominatorUnits.entrySet()) { 284 // if there is more than one denominator unit and this is the first, add open parenthesis 285 if (denominatorCount == 1 && denominatorUnits.size() > 1 ) { 286 sb.append("("); 287 } 288 // add multiplication separators after first unit 289 if (denominatorCount > 1){ 290 sb.append("."); 291 } 292 // add individual unit string 293 format(u.getKey(),sb); 294 // add power number if abs greater than 1 295 if (u.getValue() < -1){ 296 sb.append(-u.getValue()); 297 } 298 // if there is more than one denominator unit and this is the last, add close parenthesis 299 if (denominatorCount == denominatorUnits.size() && denominatorUnits.size() > 1 ) { 300 sb.append(")"); 301 } 302 denominatorCount++; 303 } 304 } 305 return sb; 306 } 307 308 @SuppressWarnings({ "unchecked", "rawtypes" }) 309 private CharSequence symbolForKilogram(AbstractUnit unit) throws IOException { 310 311 final Unit<?> systemUnit = unit.getSystemUnit(); 312 if (!systemUnit.equals(SI.KILOGRAM)) { 313 return null; 314 } 315 316 final UnitConverter converter = 317 UCUMFormatHelper.toKnownPrefixConverterIfPossible( 318 unit.getConverterTo(systemUnit) 319 .concatenate(MultiplyConverter.ofPrefix(MetricPrefix.KILO))); 320 321 final StringBuilder sb = new StringBuilder(); 322 final boolean printSeparator = true; 323 324 // A special case because KILOGRAM is a BaseUnit instead of 325 // a transformed unit, for compatibility with existing SI 326 // unit system. 327 format(SI.GRAM, sb); 328 formatConverter(converter, printSeparator, sb, symbolMap); 329 330 return sb; 331 } 332 333 @SuppressWarnings({ "unchecked", "rawtypes" }) 334 private CharSequence symbolForNonSystemUnit(AbstractUnit unit) throws IOException { 335 336 if (unit.isSystemUnit()) { 337 return null; 338 } 339 340 final Unit<?> parentUnit = unit.getSystemUnit(); 341 final UnitConverter converter = unit.getConverterTo(parentUnit); 342 final StringBuilder sb = new StringBuilder(); 343 final boolean printSeparator = !parentUnit.equals(ONE); 344 345 format(parentUnit, sb); 346 formatConverter(converter, printSeparator, sb, symbolMap); 347 348 return sb; 349 } 350 351 // --- 352 353 public void label(Unit<?> unit, String label) { 354 throw new UnsupportedOperationException("label() not supported by this implementation"); 355 } 356 357 public boolean isLocaleSensitive() { 358 return false; 359 } 360 361 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 362 appendable.append('{'); 363 appendable.append(annotation); 364 appendable.append('}'); 365 } 366 367 /////////////////// 368 // Inner classes // 369 /////////////////// 370 371 /** 372 * Variant of unit representation in the UCUM standard 373 * 374 * @see <a href= "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> UCUM - Character Set and Lexical Rules</a> 375 */ 376 public static enum Variant { 377 CASE_SENSITIVE, CASE_INSENSITIVE, PRINT 378 } 379 380 /** 381 * The Print format is used to output units according to the "print" column in the UCUM standard. Because "print" symbols in UCUM are not unique, 382 * this class of UCUMFormat may not be used for parsing, only for formatting. 383 */ 384 private static final class Print extends UCUMFormat { 385 386 /** 387 * 388 */ 389 // private static final long serialVersionUID = 2990875526976721414L; 390 private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print")); 391 private static final Print DEFAULT = new Print(PRINT_SYMBOLS); 392 393 public Print(SymbolMap symbols) { 394 super(symbols); 395 } 396 397 @Override 398 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException { 399 throw new UnsupportedOperationException("The print format is for pretty-printing of units only. Parsing is not supported."); 400 } 401 402 @Override 403 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 404 if (symbol != null && symbol.length() > 0) { 405 appendable.append('('); 406 appendable.append(annotation); 407 appendable.append(')'); 408 } else { 409 appendable.append(annotation); 410 } 411 } 412 413 @Override 414 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException { 415 return parse(csq, new ParsePosition(0)); 416 417 } 418 419 @Override 420 public String toString() { 421 return "UCUM Print"; 422 } 423 } 424 425 /** 426 * The Parsing format outputs formats and parses units according to the "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap 427 * is passed to its constructor. 428 */ 429 private static final class Parsing extends UCUMFormat { 430 // private static final long serialVersionUID = -922531801940132715L; 431 private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap 432 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() { 433 @Override 434 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 435 if (baseName == null) 436 throw new NullPointerException(); 437 if (locale.equals(new Locale("", "CS"))) { 438 return Arrays.asList(locale, Locale.ROOT); 439 } 440 return super.getCandidateLocales(baseName, locale); 441 } 442 })); 443 private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap 444 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() { 445 @Override 446 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 447 if (baseName == null) 448 throw new NullPointerException(); 449 if (locale.equals(new Locale("", "CI"))) { 450 return Arrays.asList(locale, Locale.ROOT); 451 } else if (locale.equals(Locale.GERMANY)) { 452 // TODO why GERMANY? 453 return Arrays.asList(locale, 454 // no Locale.GERMAN here 455 Locale.ROOT); 456 } 457 return super.getCandidateLocales(baseName, locale); 458 } 459 })); 460 private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true); 461 private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false); 462 private final boolean caseSensitive; 463 464 public Parsing(SymbolMap symbols, boolean caseSensitive) { 465 super(symbols); 466 this.caseSensitive = caseSensitive; 467 } 468 469 @Override 470 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws MeasurementParseException { 471 // Parsing reads the whole character sequence from the parse position. 472 int start = cursor.getIndex(); 473 int end = csq.length(); 474 if (end <= start) { 475 return ONE; 476 } 477 String source = csq.subSequence(start, end).toString().trim(); 478 if (source.length() == 0) { 479 return ONE; 480 } 481 if (!caseSensitive) { 482 source = source.toUpperCase(); 483 } 484 UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes())); 485 try { 486 Unit<?> result = parser.parseUnit(); 487 cursor.setIndex(end); 488 return result; 489 } catch (TokenException e) { 490 if (e.getToken() != null) { 491 cursor.setErrorIndex(start + e.getToken().endColumn); 492 } else { 493 cursor.setErrorIndex(start); 494 } 495 throw new MeasurementParseException(e); 496 } catch (TokenMgrError e) { 497 cursor.setErrorIndex(start); 498 throw new IllegalArgumentException(e.getMessage()); 499 } 500 } 501 502 @Override 503 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws MeasurementParseException { 504 return parse(csq, new ParsePosition(0)); 505 } 506 507 @Override 508 public String toString() { 509 return "UCUM Parsing [" + 510 (caseSensitive ? "Case Sensitive" : "Case Insensitive") + 511 "]"; 512 } 513 } 514}