001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.Reader; 006 007import org.openstreetmap.josm.tools.Logging; 008 009/** 010 * FilterInputStream that gets rid of characters that are invalid in an XML 1.0 011 * document. 012 * 013 * Although these characters are forbidden, in the real wold they still appear 014 * in XML files. Java's SAX parser throws an exception, so we have to filter 015 * at a lower level. 016 * 017 * Only handles control characters (<0x20). Invalid characters are replaced 018 * by space (0x20). 019 */ 020public class InvalidXmlCharacterFilter extends Reader { 021 022 private final Reader reader; 023 024 private static boolean firstWarning = true; 025 026 private static final boolean[] INVALID_CHARS; 027 028 static { 029 INVALID_CHARS = new boolean[0x20]; 030 for (int i = 0; i < INVALID_CHARS.length; ++i) { 031 INVALID_CHARS[i] = true; 032 } 033 INVALID_CHARS[0x9] = false; // tab 034 INVALID_CHARS[0xA] = false; // LF 035 INVALID_CHARS[0xD] = false; // CR 036 } 037 038 /** 039 * Constructs a new {@code InvalidXmlCharacterFilter} for the given Reader. 040 * @param reader The reader to filter 041 */ 042 public InvalidXmlCharacterFilter(Reader reader) { 043 this.reader = reader; 044 } 045 046 @Override 047 public int read(char[] b, int off, int len) throws IOException { 048 int n = reader.read(b, off, len); 049 if (n == -1) { 050 return -1; 051 } 052 for (int i = off; i < off + n; ++i) { 053 b[i] = filter(b[i]); 054 } 055 return n; 056 } 057 058 @Override 059 public void close() throws IOException { 060 reader.close(); 061 } 062 063 private static char filter(char in) { 064 if (in < 0x20 && INVALID_CHARS[in]) { 065 if (firstWarning) { 066 Logging.warn("Invalid xml character encountered: '"+in+"'."); 067 firstWarning = false; 068 } 069 return 0x20; 070 } 071 return in; 072 } 073}