1   /*
2    * ====================================================================
3    *
4    *  Copyright 1999-2004 The Apache Software Foundation
5    *
6    *  Licensed under the Apache License, Version 2.0 (the "License");
7    *  you may not use this file except in compliance with the License.
8    *  You may obtain a copy of the License at
9    *
10   *      http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *  Unless required by applicable law or agreed to in writing, software
13   *  distributed under the License is distributed on an "AS IS" BASIS,
14   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   *  See the License for the specific language governing permissions and
16   *  limitations under the License.
17   * ====================================================================
18   *
19   * This software consists of voluntary contributions made by many
20   * individuals on behalf of the Apache Software Foundation.  For more
21   * information on the Apache Software Foundation, please see
22   * <http://www.apache.org/>.
23   *
24   * [Additional notices, if required by prior licensing conditions]
25   *
26   */
27  
28  package org.apache.commons.httpclient;
29  
30  import junit.framework.Test;
31  import junit.framework.TestCase;
32  import junit.framework.TestSuite;
33  import java.io.IOException;
34  import java.io.InputStream;
35  import java.util.HashMap;
36  import java.util.Map;
37  import java.util.StringTokenizer;
38  
39  import org.apache.commons.httpclient.methods.GetMethod;
40  import org.apache.commons.httpclient.methods.PostMethod;
41  import org.apache.commons.httpclient.util.URIUtil;
42  
43  /***
44   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
45   * @author <a href="mailto:ajmas@bigfoot.com">Andre John Mas</a>
46   * @author <a href="mailto:laura@lwerner.org">Laura Werner</a>
47   */
48  
49  public class TestMethodCharEncoding extends TestCase {
50  
51      static final String CHARSET_DEFAULT = HttpConstants.DEFAULT_CONTENT_CHARSET;
52      static final String CHARSET_ASCII = "US-ASCII";
53      static final String CHARSET_UTF8 = "UTF-8";
54      static final String CHARSET_KOI8_R = "KOI8_R";
55      static final String CHARSET_WIN1251 = "Cp1251";
56  
57      static final int SWISS_GERMAN_STUFF_UNICODE [] = {
58          0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4
59      };
60      
61      static final int SWISS_GERMAN_STUFF_ISO8859_1 [] = {
62          0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4
63      };
64      
65      static final int SWISS_GERMAN_STUFF_UTF8 [] = {
66          0x47, 0x72, 0xC3, 0xBC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xC3, 0xA4,
67          0x6D, 0xC3, 0xA4
68      };
69  
70      static final int RUSSIAN_STUFF_UNICODE [] = {
71          0x412, 0x441, 0x435, 0x43C, 0x5F, 0x43F, 0x440, 0x438, 
72          0x432, 0x435, 0x442 
73      }; 
74  
75      static final int RUSSIAN_STUFF_UTF8 [] = {
76          0xD0, 0x92, 0xD1, 0x81, 0xD0, 0xB5, 0xD0, 0xBC, 0x5F, 
77          0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 
78          0xB5, 0xD1, 0x82
79      };
80  
81      static final int RUSSIAN_STUFF_KOI8R [] = {
82          0xF7, 0xD3, 0xC5, 0xCD, 0x5F, 0xD0, 0xD2, 0xC9, 0xD7, 
83          0xC5, 0xD4
84      };
85  
86      static final int RUSSIAN_STUFF_WIN1251 [] = {
87          0xC2, 0xF1, 0xE5, 0xEC, 0x5F, 0xEF, 0xF0, 0xE8, 0xE2, 
88          0xE5, 0xF2
89      };
90  
91      // ------------------------------------------------------------ Constructor
92  
93      public TestMethodCharEncoding(String testName) {
94          super(testName);
95      }
96  
97      // ------------------------------------------------------- TestCase Methods
98  
99      public static Test suite() {
100         return new TestSuite(TestMethodCharEncoding.class);
101     }
102 
103     // ----------------------------------------------------------------- Tests
104 
105 
106     public void testRequestCharEncoding() throws IOException {
107         
108         GetMethod httpget = new GetMethod("/");
109         assertEquals(CHARSET_DEFAULT, httpget.getRequestCharSet());
110         httpget.setRequestHeader("Content-Type", "text/plain; charset=" + CHARSET_ASCII); 
111         assertEquals(CHARSET_ASCII, httpget.getRequestCharSet());
112         httpget.setRequestHeader("Content-Type", "text/plain; charset=" + CHARSET_UTF8); 
113         assertEquals(CHARSET_UTF8, httpget.getRequestCharSet());
114         
115     }
116 
117     public void testResponseCharEncoding() throws IOException {
118         
119         SimpleHttpConnection conn = new SimpleHttpConnection();
120         String body = "stuff";
121         String headers1 = "HTTP/1.1 200 OK\r\n"
122                        +"Content-Length: 4\r\n";
123         conn.addResponse(headers1, body);
124         conn.open();
125         GetMethod httpget = new GetMethod("/");
126         httpget.execute(new HttpState(), conn);
127         assertEquals(CHARSET_DEFAULT, httpget.getResponseCharSet());
128         conn.close();
129         httpget.recycle();
130         
131         String headers2 = "HTTP/1.1 200 OK\r\n"
132                        +"Content-Type: text/plain\r\n"
133                        +"Content-Length: 4\r\n";
134         conn.addResponse(headers2, body);
135         conn.open();
136         httpget.setPath("/");
137         httpget.execute(new HttpState(), conn);
138         assertEquals(CHARSET_DEFAULT, httpget.getResponseCharSet());
139         conn.close();
140         httpget.recycle();
141 
142         String headers3 = "HTTP/1.1 200 OK\r\n"
143                        +"Content-Type: text/plain; charset=" + CHARSET_UTF8 + "\r\n"
144                        +"Content-Length: 4\r\n";
145         conn.addResponse(headers3, body);
146         conn.open();
147         httpget.setPath("/");
148         httpget.execute(new HttpState(), conn);
149         assertEquals(CHARSET_UTF8, httpget.getResponseCharSet());
150         conn.close();
151         httpget.recycle();
152 
153     }
154 
155 
156     private String constructString(int [] unicodeChars) {
157         StringBuffer buffer = new StringBuffer();
158         if (unicodeChars != null) {
159             for (int i = 0; i < unicodeChars.length; i++) {
160                 buffer.append((char)unicodeChars[i]); 
161             }
162         }
163         return buffer.toString();
164     }
165 
166 
167     private void verifyEncoding(final InputStream instream, final int[] sample)
168      throws IOException  {
169         assertNotNull("Request body", instream);
170         
171         for (int i = 0; i < sample.length; i++) {
172             int b = instream.read();
173             assertTrue("Unexpected end of stream", b != -1);
174             if (sample[i] != b) {
175                 fail("Invalid request body encoding");
176             }
177         }
178         assertTrue("End of stream expected", instream.read() == -1);
179     }
180     
181     
182     public void testLatinAccentInRequestBody() throws IOException {
183 
184         PostMethod httppost = new PostMethod("/");
185         httppost.setRequestBody(constructString(SWISS_GERMAN_STUFF_UNICODE));
186         // Test default encoding ISO-8859-1
187         verifyEncoding(httppost.getRequestBody(), SWISS_GERMAN_STUFF_ISO8859_1);
188         // Test UTF-8 encoding
189         httppost.setRequestHeader("Content-Type", "text/plain; charset=" + CHARSET_UTF8);
190         verifyEncoding(httppost.getRequestBody(), SWISS_GERMAN_STUFF_UTF8);
191 
192     }
193     
194     public void testRussianInRequestBody() throws IOException {
195 
196         PostMethod httppost = new PostMethod("/");
197         httppost.setRequestBody(constructString(RUSSIAN_STUFF_UNICODE));
198 
199         // Test UTF-8 encoding
200         httppost.setRequestHeader("Content-Type", "text/plain; charset=" + CHARSET_UTF8);
201         verifyEncoding(httppost.getRequestBody(), RUSSIAN_STUFF_UTF8);
202         // Test KOI8-R
203         httppost.setRequestHeader("Content-Type", "text/plain; charset=" + CHARSET_KOI8_R);
204         verifyEncoding(httppost.getRequestBody(), RUSSIAN_STUFF_KOI8R);
205         // Test WIN1251
206         httppost.setRequestHeader("Content-Type", "text/plain; charset=" + CHARSET_WIN1251);
207         verifyEncoding(httppost.getRequestBody(), RUSSIAN_STUFF_WIN1251);
208 
209     }
210 
211     public void testQueryParams() throws IOException {
212 
213         GetMethod get = new GetMethod("/");
214 
215         String ru_msg = constructString(RUSSIAN_STUFF_UNICODE); 
216         String ch_msg = constructString(SWISS_GERMAN_STUFF_UNICODE); 
217 
218         get.setQueryString(new NameValuePair[] {
219             new NameValuePair("ru", ru_msg),
220             new NameValuePair("ch", ch_msg) 
221         });            
222 
223         Map params = new HashMap();
224         StringTokenizer tokenizer = new StringTokenizer(
225             get.getQueryString(), "&");
226         while (tokenizer.hasMoreTokens()) {
227             String s = tokenizer.nextToken();
228             int i = s.indexOf('=');
229             assertTrue("Invalid url-encoded parameters", i != -1);
230             String name = s.substring(0, i).trim(); 
231             String value = s.substring(i + 1, s.length()).trim(); 
232             value = URIUtil.decode(value, CHARSET_UTF8);
233             params.put(name, value);
234         }
235         assertEquals(ru_msg, params.get("ru"));
236         assertEquals(ch_msg, params.get("ch"));
237     }
238 
239     public void testUrlEncodedRequestBody() throws IOException {
240 
241         PostMethod httppost = new PostMethod("/");
242 
243         String ru_msg = constructString(RUSSIAN_STUFF_UNICODE); 
244         String ch_msg = constructString(SWISS_GERMAN_STUFF_UNICODE); 
245 
246         httppost.setRequestBody(new NameValuePair[] {
247             new NameValuePair("ru", ru_msg),
248             new NameValuePair("ch", ch_msg) 
249         });            
250 
251         httppost.setRequestHeader("Content-Type", PostMethod.FORM_URL_ENCODED_CONTENT_TYPE 
252             + "; charset=" + CHARSET_UTF8);
253 
254         Map params = new HashMap();
255         StringTokenizer tokenizer = new StringTokenizer(
256         httppost.getRequestBodyAsString(), "&");
257         while (tokenizer.hasMoreTokens()) {
258             String s = tokenizer.nextToken();
259             int i = s.indexOf('=');
260             assertTrue("Invalid url-encoded parameters", i != -1);
261             String name = s.substring(0, i).trim(); 
262             String value = s.substring(i + 1, s.length()).trim(); 
263             value = URIUtil.decode(value, CHARSET_UTF8);
264             params.put(name, value);
265         }
266         assertEquals(ru_msg, params.get("ru"));
267         assertEquals(ch_msg, params.get("ch"));
268     }
269     
270 }