1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.unittest;
18
19 import com.sun.syndication.io.XmlReader;
20 import junit.framework.TestCase;
21
22 import java.io.*;
23 import java.text.MessageFormat;
24 import java.util.HashMap;
25 import java.util.Map;
26
27
28
29
30
31 public class TestXmlReader extends TestCase {
32 private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
33 private static final String XML4 = "xml-prolog-encoding-single-quotes";
34 private static final String XML3 = "xml-prolog-encoding-double-quotes";
35 private static final String XML2 = "xml-prolog";
36 private static final String XML1 = "xml";
37
38 public static void main(String[] args) throws Exception {
39 TestXmlReader test = new TestXmlReader();
40 test.testRawBom();
41 test.testRawNoBom();
42 test.testHttp();
43 }
44
45 protected void _testRawNoBomValid(String encoding) throws Exception {
46 InputStream is = getXmlStream("no-bom",XML1,encoding,encoding);
47 XmlReader xmlReader = new XmlReader(is,false);
48 assertEquals(xmlReader.getEncoding(),"UTF-8");
49
50 is = getXmlStream("no-bom",XML2,encoding,encoding);
51 xmlReader = new XmlReader(is);
52 assertEquals(xmlReader.getEncoding(),"UTF-8");
53
54 is = getXmlStream("no-bom",XML3,encoding,encoding);
55 xmlReader = new XmlReader(is);
56 assertEquals(xmlReader.getEncoding(),encoding);
57
58 is = getXmlStream("no-bom", XML4, encoding, encoding);
59 xmlReader = new XmlReader(is);
60 assertEquals(xmlReader.getEncoding(), encoding);
61
62 is = getXmlStream("no-bom", XML5, encoding, encoding);
63 xmlReader = new XmlReader(is);
64 assertEquals(xmlReader.getEncoding(), encoding);
65 }
66
67 protected void _testRawNoBomInvalid(String encoding) throws Exception {
68 InputStream is = getXmlStream("no-bom",XML3,encoding,encoding);
69 try {
70 XmlReader xmlReader = new XmlReader(is,false);
71 fail("It should have failed");
72 }
73 catch (IOException ex) {
74 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
75 }
76 }
77
78 public void testRawNoBom() throws Exception {
79 _testRawNoBomValid("US-ASCII");
80 _testRawNoBomValid("UTF-8");
81 _testRawNoBomValid("ISO-8859-1");
82 }
83
84 protected void _testRawBomValid(String encoding) throws Exception {
85 InputStream is = getXmlStream(encoding+"-bom",XML3,encoding,encoding);
86 XmlReader xmlReader = new XmlReader(is,false);
87 if (!encoding.equals("UTF-16")) {
88 assertEquals(xmlReader.getEncoding(),encoding);
89 }
90 else {
91 assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
92 }
93 }
94
95 protected void _testRawBomInvalid(String bomEnc,String streamEnc,String prologEnc) throws Exception {
96 InputStream is = getXmlStream(bomEnc,XML3,streamEnc,prologEnc);
97 try {
98 XmlReader xmlReader = new XmlReader(is,false);
99 fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
100 }
101 catch (IOException ex) {
102 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
103 }
104 }
105
106 public void testRawBom() throws Exception {
107 _testRawBomValid("UTF-8");
108 _testRawBomValid("UTF-16BE");
109 _testRawBomValid("UTF-16LE");
110 _testRawBomValid("UTF-16");
111
112 _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
113 _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
114 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
115 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
116 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
117 _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
118 _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
119 _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
120 }
121
122 public void testHttp() throws Exception {
123 _testHttpValid("application/xml","no-bom","US-ASCII",null);
124 _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
125 _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
126 _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
127 _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
128 _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
129 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
130 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
131 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
132
133 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
134 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
135 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
136 _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
137 _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
138 _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
139
140 _testHttpValid("text/xml","no-bom","US-ASCII",null);
141 _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
142 _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
143 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
144 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
145 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
146 _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
147
148 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
149 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
150 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
151 _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
152 _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
153
154 _testHttpLenient("text/xml","no-bom","US-ASCII",null, "US-ASCII");
155 _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8", "UTF-8");
156 _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null, "UTF-8");
157 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
158 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
159 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
160 _testHttpLenient("text/xml","UTF-8-bom","US-ASCII",null, "US-ASCII");
161
162 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
163 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
164 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
165 _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
166 _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE",null, "UTF-16");
167
168 _testHttpLenient("text/html","no-bom","US-ASCII","US-ASCII", "US-ASCII");
169 _testHttpLenient("text/html","no-bom","US-ASCII",null, "US-ASCII");
170 _testHttpLenient("text/html;charset=UTF-8","no-bom","US-ASCII","UTF-8", "UTF-8");
171 _testHttpLenient("text/html;charset=UTF-16BE","no-bom","US-ASCII","UTF-8", "UTF-8");
172 }
173
174 public void _testHttpValid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
175 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML1 :XML3,streamEnc,prologEnc);
176 XmlReader xmlReader = new XmlReader(is,cT,false);
177 if (!streamEnc.equals("UTF-16")) {
178
179 }
180 else {
181 assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
182 }
183 }
184
185 protected void _testHttpInvalid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
186 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML2 :XML3,streamEnc,prologEnc);
187 try {
188 new XmlReader(is,cT,false);
189 fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
190 }
191 catch (IOException ex) {
192 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
193 }
194 }
195
196 protected void _testHttpLenient(String cT, String bomEnc, String streamEnc, String prologEnc, String shouldbe) throws Exception {
197 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML2 :XML3,streamEnc,prologEnc);
198 XmlReader xmlReader = new XmlReader(is,cT,true);
199 assertEquals(xmlReader.getEncoding(),shouldbe);
200 }
201
202
203
204 private static final int[] NO_BOM_BYTES = {};
205 private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
206 private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
207 private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
208
209 private static final Map BOMs = new HashMap();
210
211 static {
212 BOMs.put("no-bom",NO_BOM_BYTES);
213 BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
214 BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
215 BOMs.put("UTF-16-bom",NO_BOM_BYTES);
216 BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
217 }
218
219 private static final MessageFormat XML = new MessageFormat(
220 "<root>{2}</root>");
221 private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
222 "<?xml version=\"1.0\"?>\n<root>{2}</root>");
223 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
224 "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
225 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
226 "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
227 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
228 "<?xml version=\"1.0\" encoding = \t \n \r''{1}''?>\n<root>{2}</root>");
229
230 private static final MessageFormat INFO = new MessageFormat(
231 "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
232
233 private static final Map XMLs = new HashMap();
234
235 static {
236 XMLs.put(XML1, XML);
237 XMLs.put(XML2, XML_WITH_PROLOG);
238 XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
239 XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
240 XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
241 }
242
243
244
245
246
247
248
249 protected InputStream getXmlStream(String bomType,String xmlType,String streamEnc,String prologEnc) throws IOException {
250 ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
251 int[] bom = (int[]) BOMs.get(bomType);
252 if (bom==null) {
253 bom = new int[0];
254 }
255 MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
256 for (int i=0;i<bom.length;i++) {
257 baos.write(bom[i]);
258 }
259 Writer writer = new OutputStreamWriter(baos,streamEnc);
260 String info = INFO.format(new Object[]{bomType,xmlType,prologEnc});
261 String xmlDoc = xml.format(new Object[]{streamEnc,prologEnc,info});
262 writer.write(xmlDoc);
263
264
265 writer.write("<da>\n");
266 for (int i=0;i<10000;i++) {
267 writer.write("<do/>\n");
268 }
269 writer.write("</da>\n");
270
271 writer.close();
272 return new ByteArrayInputStream(baos.toByteArray());
273 }
274
275
276 }