001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.data.validation.tests; 003 004import static org.openstreetmap.josm.tools.I18n.marktr; 005import static org.openstreetmap.josm.tools.I18n.tr; 006 007import java.awt.GridBagConstraints; 008import java.awt.event.ActionListener; 009import java.io.BufferedReader; 010import java.io.IOException; 011import java.lang.Character.UnicodeBlock; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collection; 015import java.util.Collections; 016import java.util.HashMap; 017import java.util.HashSet; 018import java.util.List; 019import java.util.Locale; 020import java.util.Map; 021import java.util.Map.Entry; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import javax.swing.JCheckBox; 026import javax.swing.JLabel; 027import javax.swing.JPanel; 028 029import org.openstreetmap.josm.command.ChangePropertyCommand; 030import org.openstreetmap.josm.command.ChangePropertyKeyCommand; 031import org.openstreetmap.josm.command.Command; 032import org.openstreetmap.josm.command.SequenceCommand; 033import org.openstreetmap.josm.data.osm.AbstractPrimitive; 034import org.openstreetmap.josm.data.osm.OsmPrimitive; 035import org.openstreetmap.josm.data.osm.Tag; 036import org.openstreetmap.josm.data.osm.Tagged; 037import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper; 038import org.openstreetmap.josm.data.validation.Severity; 039import org.openstreetmap.josm.data.validation.Test.TagTest; 040import org.openstreetmap.josm.data.validation.TestError; 041import org.openstreetmap.josm.data.validation.util.Entities; 042import org.openstreetmap.josm.gui.progress.ProgressMonitor; 043import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset; 044import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem; 045import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets; 046import org.openstreetmap.josm.gui.tagging.presets.items.Check; 047import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup; 048import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem; 049import org.openstreetmap.josm.gui.widgets.EditableList; 050import org.openstreetmap.josm.io.CachedFile; 051import org.openstreetmap.josm.spi.preferences.Config; 052import org.openstreetmap.josm.tools.GBC; 053import org.openstreetmap.josm.tools.Logging; 054import org.openstreetmap.josm.tools.MultiMap; 055import org.openstreetmap.josm.tools.Utils; 056 057/** 058 * Check for misspelled or wrong tags 059 * 060 * @author frsantos 061 * @since 3669 062 */ 063public class TagChecker extends TagTest { 064 065 /** The config file of ignored tags */ 066 public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg"; 067 /** The config file of dictionary words */ 068 public static final String SPELL_FILE = "resource://data/validator/words.cfg"; 069 070 /** Normalized keys: the key should be substituted by the value if the key was not found in presets */ 071 private static final Map<String, String> harmonizedKeys = new HashMap<>(); 072 /** The spell check preset values which are not stored in TaggingPresets */ 073 private static volatile HashSet<String> additionalPresetsValueData; 074 /** often used tags which are not in presets */ 075 private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>(); 076 077 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile( 078 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]"); 079 080 /** The TagChecker data */ 081 private static final List<String> ignoreDataStartsWith = new ArrayList<>(); 082 private static final Set<String> ignoreDataEquals = new HashSet<>(); 083 private static final List<String> ignoreDataEndsWith = new ArrayList<>(); 084 private static final List<Tag> ignoreDataTag = new ArrayList<>(); 085 /** tag keys that have only numerical values in the presets */ 086 private static final Set<String> ignoreForLevenshtein = new HashSet<>(); 087 088 /** The preferences prefix */ 089 protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName(); 090 091 /** 092 * The preference key to check values 093 */ 094 public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues"; 095 /** 096 * The preference key to check keys 097 */ 098 public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys"; 099 /** 100 * The preference key to enable complex checks 101 */ 102 public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex"; 103 /** 104 * The preference key to search for fixme tags 105 */ 106 public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes"; 107 108 /** 109 * The preference key for source files 110 * @see #DEFAULT_SOURCES 111 */ 112 public static final String PREF_SOURCES = PREFIX + ".source"; 113 114 private static final String BEFORE_UPLOAD = "BeforeUpload"; 115 /** 116 * The preference key to check keys - used before upload 117 */ 118 public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD; 119 /** 120 * The preference key to check values - used before upload 121 */ 122 public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD; 123 /** 124 * The preference key to run complex tests - used before upload 125 */ 126 public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD; 127 /** 128 * The preference key to search for fixmes - used before upload 129 */ 130 public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD; 131 132 private static final int MAX_LEVENSHTEIN_DISTANCE = 2; 133 134 protected boolean checkKeys; 135 protected boolean checkValues; 136 /** Was used for special configuration file, might be used to disable value spell checker. */ 137 protected boolean checkComplex; 138 protected boolean checkFixmes; 139 140 protected JCheckBox prefCheckKeys; 141 protected JCheckBox prefCheckValues; 142 protected JCheckBox prefCheckComplex; 143 protected JCheckBox prefCheckFixmes; 144 protected JCheckBox prefCheckPaint; 145 146 protected JCheckBox prefCheckKeysBeforeUpload; 147 protected JCheckBox prefCheckValuesBeforeUpload; 148 protected JCheckBox prefCheckComplexBeforeUpload; 149 protected JCheckBox prefCheckFixmesBeforeUpload; 150 protected JCheckBox prefCheckPaintBeforeUpload; 151 152 // CHECKSTYLE.OFF: SingleSpaceSeparator 153 protected static final int EMPTY_VALUES = 1200; 154 protected static final int INVALID_KEY = 1201; 155 protected static final int INVALID_VALUE = 1202; 156 protected static final int FIXME = 1203; 157 protected static final int INVALID_SPACE = 1204; 158 protected static final int INVALID_KEY_SPACE = 1205; 159 protected static final int INVALID_HTML = 1206; /* 1207 was PAINT */ 160 protected static final int LONG_VALUE = 1208; 161 protected static final int LONG_KEY = 1209; 162 protected static final int LOW_CHAR_VALUE = 1210; 163 protected static final int LOW_CHAR_KEY = 1211; 164 protected static final int MISSPELLED_VALUE = 1212; 165 protected static final int MISSPELLED_KEY = 1213; 166 protected static final int MULTIPLE_SPACES = 1214; 167 protected static final int MISSPELLED_VALUE_NO_FIX = 1215; 168 protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216; 169 // CHECKSTYLE.ON: SingleSpaceSeparator 170 171 protected EditableList sourcesList; 172 173 private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE); 174 175 /** 176 * Constructor 177 */ 178 public TagChecker() { 179 super(tr("Tag checker"), tr("This test checks for errors in tag keys and values.")); 180 } 181 182 @Override 183 public void initialize() throws IOException { 184 initializeData(); 185 initializePresets(); 186 analysePresets(); 187 } 188 189 /** 190 * Add presets that contain only numerical values to the ignore list 191 */ 192 private static void analysePresets() { 193 for (String key : TaggingPresets.getPresetKeys()) { 194 if (isKeyIgnored(key)) 195 continue; 196 boolean allNumerical = true; 197 Set<String> values = TaggingPresets.getPresetValues(key); 198 if (values.isEmpty()) 199 allNumerical = false; 200 for (String val : values) { 201 if (!isNum(val)) { 202 allNumerical = false; 203 break; 204 } 205 } 206 if (allNumerical) { 207 ignoreForLevenshtein.add(key); 208 } 209 } 210 } 211 212 /** 213 * Reads the spell-check file into a HashMap. 214 * The data file is a list of words, beginning with +/-. If it starts with +, 215 * the word is valid, but if it starts with -, the word should be replaced 216 * by the nearest + word before this. 217 * 218 * @throws IOException if any I/O error occurs 219 */ 220 private static void initializeData() throws IOException { 221 ignoreDataStartsWith.clear(); 222 ignoreDataEquals.clear(); 223 ignoreDataEndsWith.clear(); 224 ignoreDataTag.clear(); 225 harmonizedKeys.clear(); 226 ignoreForLevenshtein.clear(); 227 oftenUsedTags.clear(); 228 229 StringBuilder errorSources = new StringBuilder(); 230 for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) { 231 try ( 232 CachedFile cf = new CachedFile(source); 233 BufferedReader reader = cf.getContentReader() 234 ) { 235 String okValue = null; 236 boolean tagcheckerfile = false; 237 boolean ignorefile = false; 238 boolean isFirstLine = true; 239 String line; 240 while ((line = reader.readLine()) != null) { 241 if (line.isEmpty()) { 242 // ignore 243 } else if (line.startsWith("#")) { 244 if (line.startsWith("# JOSM TagChecker")) { 245 tagcheckerfile = true; 246 Logging.error(tr("Ignoring {0}. Support was dropped", source)); 247 } else 248 if (line.startsWith("# JOSM IgnoreTags")) { 249 ignorefile = true; 250 if (!DEFAULT_SOURCES.contains(source)) { 251 Logging.info(tr("Adding {0} to ignore tags", source)); 252 } 253 } 254 } else if (ignorefile) { 255 parseIgnoreFileLine(source, line); 256 } else if (tagcheckerfile) { 257 // ignore 258 } else if (line.charAt(0) == '+') { 259 okValue = line.substring(1); 260 } else if (line.charAt(0) == '-' && okValue != null) { 261 String hk = harmonizeKey(line.substring(1)); 262 if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) { 263 Logging.debug(tr("Line was ignored: {0}", line)); 264 } 265 } else { 266 Logging.error(tr("Invalid spellcheck line: {0}", line)); 267 } 268 if (isFirstLine) { 269 isFirstLine = false; 270 if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) { 271 Logging.info(tr("Adding {0} to spellchecker", source)); 272 } 273 } 274 } 275 } catch (IOException e) { 276 Logging.error(e); 277 errorSources.append(source).append('\n'); 278 } 279 } 280 281 if (errorSources.length() > 0) 282 throw new IOException(tr("Could not access data file(s):\n{0}", errorSources)); 283 } 284 285 /** 286 * Parse a line found in a configuration file 287 * @param source name of configuration file 288 * @param line the line to parse 289 */ 290 private static void parseIgnoreFileLine(String source, String line) { 291 line = line.trim(); 292 if (line.length() < 4) { 293 return; 294 } 295 try { 296 String key = line.substring(0, 2); 297 line = line.substring(2); 298 299 switch (key) { 300 case "S:": 301 ignoreDataStartsWith.add(line); 302 break; 303 case "E:": 304 ignoreDataEquals.add(line); 305 addToKeyDictionary(line); 306 break; 307 case "F:": 308 ignoreDataEndsWith.add(line); 309 break; 310 case "K:": 311 Tag tag = Tag.ofString(line); 312 ignoreDataTag.add(tag); 313 oftenUsedTags.put(tag.getKey(), tag.getValue()); 314 addToKeyDictionary(tag.getKey()); 315 break; 316 default: 317 if (!key.startsWith(";")) { 318 Logging.warn("Unsupported TagChecker key: " + key); 319 } 320 } 321 } catch (IllegalArgumentException e) { 322 Logging.error("Invalid line in {0} : {1}", source, e.getMessage()); 323 Logging.trace(e); 324 } 325 } 326 327 private static void addToKeyDictionary(String key) { 328 if (key != null) { 329 String hk = harmonizeKey(key); 330 if (!key.equals(hk)) { 331 harmonizedKeys.put(hk, key); 332 } 333 } 334 } 335 336 /** 337 * Reads the presets data. 338 * 339 */ 340 public static void initializePresets() { 341 342 if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true)) 343 return; 344 345 Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets(); 346 if (!presets.isEmpty()) { 347 initAdditionalPresetsValueData(); 348 for (TaggingPreset p : presets) { 349 for (TaggingPresetItem i : p.data) { 350 if (i instanceof KeyedItem) { 351 addPresetValue((KeyedItem) i); 352 } else if (i instanceof CheckGroup) { 353 for (Check c : ((CheckGroup) i).checks) { 354 addPresetValue(c); 355 } 356 } 357 } 358 } 359 } 360 } 361 362 private static void initAdditionalPresetsValueData() { 363 additionalPresetsValueData = new HashSet<>(); 364 for (String a : AbstractPrimitive.getUninterestingKeys()) { 365 additionalPresetsValueData.add(a); 366 } 367 for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys", 368 Arrays.asList("is_in", "int_ref", "fixme", "population"))) { 369 additionalPresetsValueData.add(a); 370 } 371 } 372 373 private static void addPresetValue(KeyedItem ky) { 374 if (ky.key != null && ky.getValues() != null) { 375 addToKeyDictionary(ky.key); 376 } 377 } 378 379 /** 380 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters) 381 * @param s string to check 382 * @return {@code true} if {@code s} contains non-printing control characters 383 */ 384 static boolean containsUnwantedNonPrintingControlCharacter(String s) { 385 return s != null && !s.isEmpty() && ( 386 isJoiningChar(s.charAt(0)) || 387 isJoiningChar(s.charAt(s.length() - 1)) || 388 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c)) 389 ); 390 } 391 392 private static boolean isAsciiControlChar(int c) { 393 return c < 0x20 || c == 0x7F; 394 } 395 396 private static boolean isNewLineChar(int c) { 397 return c == 0x0a || c == 0x0d; 398 } 399 400 private static boolean isJoiningChar(int c) { 401 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ 402 } 403 404 private static boolean isBidiControlChar(int c) { 405 /* check for range 0x200e to 0x200f (LRM, RLM) or 406 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 407 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e); 408 } 409 410 static String removeUnwantedNonPrintingControlCharacters(String s) { 411 // Remove all unwanted characters 412 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll(""); 413 // Remove joining characters located at the beginning of the string 414 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) { 415 result = result.substring(1); 416 } 417 // Remove joining characters located at the end of the string 418 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) { 419 result = result.substring(0, result.length() - 1); 420 } 421 return result; 422 } 423 424 static boolean containsUnusualUnicodeCharacter(String key, String value) { 425 return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, UnicodeBlock.of(c))); 426 } 427 428 /** 429 * Detects highly suspicious Unicode characters that have been seen in OSM database. 430 * @param key tag key 431 * @param b Unicode block of the current character 432 * @return {@code true} if the current unicode block is very unusual for the given key 433 */ 434 private static boolean isUnusualUnicodeBlock(String key, UnicodeBlock b) { 435 return isUnusualPhoneticUse(key, b) || isUnusualBmpUse(b) || isUnusualSmpUse(b); 436 } 437 438 private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b) { 439 return (b == UnicodeBlock.IPA_EXTENSIONS // U+0250..U+02AF 440 || b == UnicodeBlock.PHONETIC_EXTENSIONS // U+1D00..U+1D7F 441 || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT) // U+1D80..U+1DBF 442 && !key.endsWith(":pronunciation"); 443 } 444 445 private static boolean isUnusualBmpUse(UnicodeBlock b) { 446 // CHECKSTYLE.OFF: BooleanExpressionComplexity 447 return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS // U+20D0..U+20FF 448 || b == UnicodeBlock.MATHEMATICAL_OPERATORS // U+2200..U+22FF 449 || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS // U+2460..U+24FF 450 || b == UnicodeBlock.BOX_DRAWING // U+2500..U+257F 451 || b == UnicodeBlock.GEOMETRIC_SHAPES // U+25A0..U+25FF 452 || b == UnicodeBlock.DINGBATS // U+2700..U+27BF 453 || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS // U+2B00..U+2BFF 454 || b == UnicodeBlock.GLAGOLITIC // U+2C00..U+2C5F 455 || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO // U+3130..U+318F 456 || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS // U+3200..U+32FF 457 || b == UnicodeBlock.LATIN_EXTENDED_D // U+A720..U+A7FF 458 || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS // U+F900..U+FAFF 459 || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS // U+FB00..U+FB4F 460 || b == UnicodeBlock.VARIATION_SELECTORS // U+FE00..U+FE0F 461 || b == UnicodeBlock.SPECIALS; // U+FFF0..U+FFFF 462 // CHECKSTYLE.ON: BooleanExpressionComplexity 463 } 464 465 private static boolean isUnusualSmpUse(UnicodeBlock b) { 466 // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+ 467 return b == UnicodeBlock.MUSICAL_SYMBOLS // U+1D100..U+1D1FF 468 || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT // U+1F100..U+1F1FF 469 || b == UnicodeBlock.EMOTICONS // U+1F600..U+1F64F 470 || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS; // U+1F680..U+1F6FF 471 } 472 473 /** 474 * Get set of preset values for the given key. 475 * @param key the key 476 * @return null if key is not in presets or in additionalPresetsValueData, 477 * else a set which might be empty. 478 */ 479 private static Set<String> getPresetValues(String key) { 480 Set<String> res = TaggingPresets.getPresetValues(key); 481 if (res != null) 482 return res; 483 if (additionalPresetsValueData.contains(key)) 484 return Collections.emptySet(); 485 // null means key is not known 486 return null; 487 } 488 489 /** 490 * Determines if the given key is in internal presets. 491 * @param key key 492 * @return {@code true} if the given key is in internal presets 493 * @since 9023 494 */ 495 public static boolean isKeyInPresets(String key) { 496 return TaggingPresets.getPresetValues(key) != null; 497 } 498 499 /** 500 * Determines if the given tag is in internal presets. 501 * @param key key 502 * @param value value 503 * @return {@code true} if the given tag is in internal presets 504 * @since 9023 505 */ 506 public static boolean isTagInPresets(String key, String value) { 507 final Set<String> values = getPresetValues(key); 508 return values != null && values.contains(value); 509 } 510 511 /** 512 * Returns the list of ignored tags. 513 * @return the list of ignored tags 514 * @since 9023 515 */ 516 public static List<Tag> getIgnoredTags() { 517 return new ArrayList<>(ignoreDataTag); 518 } 519 520 /** 521 * Determines if the given tag key is ignored for checks "key/tag not in presets". 522 * @param key key 523 * @return true if the given key is ignored 524 */ 525 private static boolean isKeyIgnored(String key) { 526 if (ignoreDataEquals.contains(key)) { 527 return true; 528 } 529 for (String a : ignoreDataStartsWith) { 530 if (key.startsWith(a)) { 531 return true; 532 } 533 } 534 for (String a : ignoreDataEndsWith) { 535 if (key.endsWith(a)) { 536 return true; 537 } 538 } 539 return false; 540 } 541 542 /** 543 * Determines if the given tag is ignored for checks "key/tag not in presets". 544 * @param key key 545 * @param value value 546 * @return {@code true} if the given tag is ignored 547 * @since 9023 548 */ 549 public static boolean isTagIgnored(String key, String value) { 550 if (isKeyIgnored(key)) 551 return true; 552 final Set<String> values = getPresetValues(key); 553 if (values != null && values.isEmpty()) 554 return true; 555 if (!isTagInPresets(key, value)) { 556 for (Tag a : ignoreDataTag) { 557 if (key.equals(a.getKey()) && value.equals(a.getValue())) { 558 return true; 559 } 560 } 561 } 562 return false; 563 } 564 565 /** 566 * Checks the primitive tags 567 * @param p The primitive to check 568 */ 569 @Override 570 public void check(OsmPrimitive p) { 571 if (!p.isTagged()) 572 return; 573 574 // Just a collection to know if a primitive has been already marked with error 575 MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>(); 576 577 for (Entry<String, String> prop : p.getKeys().entrySet()) { 578 String s = marktr("Tag ''{0}'' invalid."); 579 String key = prop.getKey(); 580 String value = prop.getValue(); 581 582 if (checkKeys) { 583 checkSingleTagKeySimple(withErrors, p, s, key); 584 } 585 if (checkValues) { 586 checkSingleTagValueSimple(withErrors, p, s, key, value); 587 checkSingleTagComplex(withErrors, p, key, value); 588 } 589 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { 590 errors.add(TestError.builder(this, Severity.OTHER, FIXME) 591 .message(tr("FIXMES")) 592 .primitives(p) 593 .build()); 594 withErrors.put(p, "FIXME"); 595 } 596 } 597 } 598 599 private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) { 600 if (!checkValues || value == null) 601 return; 602 if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) { 603 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE) 604 .message(tr("Tag value contains non-printing character"), s, key) 605 .primitives(p) 606 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value))) 607 .build()); 608 withErrors.put(p, "ICV"); 609 } 610 if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) { 611 errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE) 612 .message(tr("Tag value contains unusual Unicode character"), s, key) 613 .primitives(p) 614 .build()); 615 withErrors.put(p, "UUCV"); 616 } 617 if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) { 618 errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE) 619 .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key) 620 .primitives(p) 621 .build()); 622 withErrors.put(p, "LV"); 623 } 624 if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) { 625 errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES) 626 .message(tr("Tags with empty values"), s, key) 627 .primitives(p) 628 .build()); 629 withErrors.put(p, "EV"); 630 } 631 final String errTypeSpace = "SPACE"; 632 if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) { 633 errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE) 634 .message(tr("Property values start or end with white space"), s, key) 635 .primitives(p) 636 .build()); 637 withErrors.put(p, errTypeSpace); 638 } 639 if (value.contains(" ") && !withErrors.contains(p, errTypeSpace)) { 640 errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES) 641 .message(tr("Property values contain multiple white spaces"), s, key) 642 .primitives(p) 643 .build()); 644 withErrors.put(p, errTypeSpace); 645 } 646 if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) { 647 errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML) 648 .message(tr("Property values contain HTML entity"), s, key) 649 .primitives(p) 650 .build()); 651 withErrors.put(p, "HTML"); 652 } 653 } 654 655 private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) { 656 if (!checkKeys || key == null) 657 return; 658 if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) { 659 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY) 660 .message(tr("Tag key contains non-printing character"), s, key) 661 .primitives(p) 662 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key))) 663 .build()); 664 withErrors.put(p, "ICK"); 665 } 666 if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) { 667 errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY) 668 .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key) 669 .primitives(p) 670 .build()); 671 withErrors.put(p, "LK"); 672 } 673 if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) { 674 errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE) 675 .message(tr("Invalid white space in property key"), s, key) 676 .primitives(p) 677 .build()); 678 withErrors.put(p, "IPK"); 679 } 680 } 681 682 private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) { 683 if (!checkValues || key == null || value == null || value.isEmpty()) 684 return; 685 if (additionalPresetsValueData != null && !isTagIgnored(key, value)) { 686 if (!isKeyInPresets(key)) { 687 spellCheckKey(withErrors, p, key); 688 } else if (!isTagInPresets(key, value)) { 689 if (oftenUsedTags.contains(key, value)) { 690 // tag is quite often used but not in presets 691 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 692 .message(tr("Presets do not contain property value"), 693 marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key) 694 .primitives(p) 695 .build()); 696 withErrors.put(p, "UPV"); 697 } else { 698 tryGuess(p, key, value, withErrors); 699 } 700 } 701 } 702 } 703 704 private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) { 705 String prettifiedKey = harmonizeKey(key); 706 String fixedKey; 707 if (ignoreDataEquals.contains(prettifiedKey)) { 708 fixedKey = prettifiedKey; 709 } else { 710 fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey); 711 } 712 if (fixedKey == null) { 713 for (Tag a : ignoreDataTag) { 714 if (a.getKey().equals(prettifiedKey)) { 715 fixedKey = prettifiedKey; 716 break; 717 } 718 } 719 } 720 721 if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) { 722 final String proposedKey = fixedKey; 723 // misspelled preset key 724 final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY) 725 .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey) 726 .primitives(p); 727 if (p.hasKey(fixedKey)) { 728 errors.add(error.build()); 729 } else { 730 errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build()); 731 } 732 withErrors.put(p, "WPK"); 733 } else { 734 errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY) 735 .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key) 736 .primitives(p) 737 .build()); 738 withErrors.put(p, "UPK"); 739 } 740 } 741 742 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) { 743 // try to fix common typos and check again if value is still unknown 744 final String harmonizedValue = harmonizeValue(value); 745 if (harmonizedValue == null || harmonizedValue.isEmpty()) 746 return; 747 String fixedValue = null; 748 List<Set<String>> sets = new ArrayList<>(); 749 Set<String> presetValues = getPresetValues(key); 750 if (presetValues != null) 751 sets.add(presetValues); 752 Set<String> usedValues = oftenUsedTags.get(key); 753 if (usedValues != null) 754 sets.add(usedValues); 755 for (Set<String> possibleValues: sets) { 756 if (possibleValues.contains(harmonizedValue)) { 757 fixedValue = harmonizedValue; 758 break; 759 } 760 } 761 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) { 762 int maxPresetValueLen = 0; 763 List<String> fixVals = new ArrayList<>(); 764 // use Levenshtein distance to find typical typos 765 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; 766 String closest = null; 767 for (Set<String> possibleValues: sets) { 768 for (String possibleVal : possibleValues) { 769 if (possibleVal.isEmpty()) 770 continue; 771 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); 772 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { 773 // don't suggest fix value when given value is short and lengths are too different 774 // for example surface=u would result in surface=mud 775 continue; 776 } 777 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); 778 if (dist >= harmonizedValue.length()) { 779 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. 780 continue; 781 } 782 if (dist < minDist) { 783 closest = possibleVal; 784 minDist = dist; 785 fixVals.clear(); 786 fixVals.add(possibleVal); 787 } else if (dist == minDist) { 788 fixVals.add(possibleVal); 789 } 790 } 791 } 792 793 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE 794 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { 795 if (fixVals.size() < 2) { 796 fixedValue = closest; 797 } else { 798 Collections.sort(fixVals); 799 // misspelled preset value with multiple good alternatives 800 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) 801 .message(tr("Unknown property value"), 802 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"), 803 value, key, fixVals) 804 .primitives(p).build()); 805 withErrors.put(p, "WPV"); 806 return; 807 } 808 } 809 } 810 if (fixedValue != null && !fixedValue.equals(value)) { 811 final String newValue = fixedValue; 812 // misspelled preset value 813 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) 814 .message(tr("Unknown property value"), 815 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue) 816 .primitives(p) 817 .build()); 818 withErrors.put(p, "WPV"); 819 } else { 820 // unknown preset value 821 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 822 .message(tr("Presets do not contain property value"), 823 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key) 824 .primitives(p) 825 .build()); 826 withErrors.put(p, "UPV"); 827 } 828 } 829 830 private static boolean isNum(String harmonizedValue) { 831 try { 832 Double.parseDouble(harmonizedValue); 833 return true; 834 } catch (NumberFormatException e) { 835 return false; 836 } 837 } 838 839 private static boolean isFixme(String key, String value) { 840 return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo") 841 || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete"); 842 } 843 844 private static String harmonizeKey(String key) { 845 return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,"); 846 } 847 848 private static String harmonizeValue(String value) { 849 return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,"); 850 } 851 852 @Override 853 public void startTest(ProgressMonitor monitor) { 854 super.startTest(monitor); 855 checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true); 856 if (isBeforeUpload) { 857 checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true); 858 } 859 860 checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true); 861 if (isBeforeUpload) { 862 checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true); 863 } 864 865 checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true); 866 if (isBeforeUpload) { 867 checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true); 868 } 869 870 checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true); 871 if (isBeforeUpload) { 872 checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true); 873 } 874 } 875 876 @Override 877 public void visit(Collection<OsmPrimitive> selection) { 878 if (checkKeys || checkValues || checkComplex || checkFixmes) { 879 super.visit(selection); 880 } 881 } 882 883 @Override 884 public void addGui(JPanel testPanel) { 885 GBC a = GBC.eol(); 886 a.anchor = GridBagConstraints.EAST; 887 888 testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0)); 889 890 prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true)); 891 prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words.")); 892 testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0)); 893 894 prefCheckKeysBeforeUpload = new JCheckBox(); 895 prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true)); 896 testPanel.add(prefCheckKeysBeforeUpload, a); 897 898 prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true)); 899 prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules.")); 900 testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0)); 901 902 prefCheckComplexBeforeUpload = new JCheckBox(); 903 prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true)); 904 testPanel.add(prefCheckComplexBeforeUpload, a); 905 906 final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES); 907 sourcesList = new EditableList(tr("TagChecker source")); 908 sourcesList.setItems(sources); 909 testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0)); 910 testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0)); 911 912 ActionListener disableCheckActionListener = e -> handlePrefEnable(); 913 prefCheckKeys.addActionListener(disableCheckActionListener); 914 prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener); 915 prefCheckComplex.addActionListener(disableCheckActionListener); 916 prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener); 917 918 handlePrefEnable(); 919 920 prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true)); 921 prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets.")); 922 testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0)); 923 924 prefCheckValuesBeforeUpload = new JCheckBox(); 925 prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true)); 926 testPanel.add(prefCheckValuesBeforeUpload, a); 927 928 prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true)); 929 prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value.")); 930 testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0)); 931 932 prefCheckFixmesBeforeUpload = new JCheckBox(); 933 prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true)); 934 testPanel.add(prefCheckFixmesBeforeUpload, a); 935 } 936 937 /** 938 * Enables/disables the source list field 939 */ 940 public void handlePrefEnable() { 941 boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected() 942 || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected(); 943 sourcesList.setEnabled(selected); 944 } 945 946 @Override 947 public boolean ok() { 948 enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected(); 949 testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected() 950 || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected(); 951 952 Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected()); 953 Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected()); 954 Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected()); 955 Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected()); 956 Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected()); 957 Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected()); 958 Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected()); 959 Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected()); 960 return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems()); 961 } 962 963 @Override 964 public Command fixError(TestError testError) { 965 List<Command> commands = new ArrayList<>(50); 966 967 Collection<? extends OsmPrimitive> primitives = testError.getPrimitives(); 968 for (OsmPrimitive p : primitives) { 969 Map<String, String> tags = p.getKeys(); 970 if (tags.isEmpty()) { 971 continue; 972 } 973 974 for (Entry<String, String> prop: tags.entrySet()) { 975 String key = prop.getKey(); 976 String value = prop.getValue(); 977 if (value == null || value.trim().isEmpty()) { 978 commands.add(new ChangePropertyCommand(p, key, null)); 979 } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains(" ")) { 980 commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value))); 981 } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains(" ")) { 982 commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key))); 983 } else { 984 String evalue = Entities.unescape(value); 985 if (!evalue.equals(value)) { 986 commands.add(new ChangePropertyCommand(p, key, evalue)); 987 } 988 } 989 } 990 } 991 992 if (commands.isEmpty()) 993 return null; 994 if (commands.size() == 1) 995 return commands.get(0); 996 997 return new SequenceCommand(tr("Fix tags"), commands); 998 } 999 1000 @Override 1001 public boolean isFixable(TestError testError) { 1002 if (testError.getTester() instanceof TagChecker) { 1003 int code = testError.getCode(); 1004 return code == EMPTY_VALUES || code == INVALID_SPACE || 1005 code == INVALID_KEY_SPACE || code == INVALID_HTML || 1006 code == MULTIPLE_SPACES; 1007 } 1008 1009 return false; 1010 } 1011}