1
2
3 from translate.convert import html2po
4 from translate.convert import po2html
5 from translate.convert import test_convert
6 from translate.misc import wStringIO
7
10 """Helper to convert html to po without a file."""
11 inputfile = wStringIO.StringIO(markup)
12 convertor = html2po.html2po()
13 outputpo = convertor.convertfile(inputfile, "test", False, False)
14 return outputpo
15
16 - def po2html(self, posource, htmltemplate):
23
25 """helper to check that we got the expected number of messages"""
26 actual = len(pofile.units)
27 if actual > 0:
28 if pofile.units[0].isheader():
29 actual = actual - 1
30 print pofile
31 assert actual == expected
32
34 """helper to validate a PO message"""
35 if not pofile.units[0].isheader():
36 unitnumber = unitnumber - 1
37 print 'unit source: ' + str(pofile.units[unitnumber].source) + '|'
38 print 'expected: ' + expected.encode('utf-8') + '|'
39 assert unicode(pofile.units[unitnumber].source) == unicode(expected)
40
46
51
53 """test to ensure that we no longer use the lang attribure"""
54 markup = '''<html lang="en"><head><title>My title</title></head><body></body></html>'''
55 pofile = self.html2po(markup)
56 self.countunits(pofile, 1)
57
58 self.compareunit(pofile, 1, "My title")
59
61 """test that we can extract the <title> tag"""
62 self.check_single("<html><head><title>My title</title></head><body></body></html>", "My title")
63
65 """Test a linebreak in the <title> tag"""
66 htmltext = '''<html>
67 <head>
68 <title>My
69 title</title>
70 </head>
71 <body>
72 </body>
73 </html>
74 '''
75 self.check_single(htmltext, "My title")
76
80
82 """test that we can extract the <p> tag"""
83 self.check_single("<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph.")
84 markup = "<p>First line.<br>Second line.</p>"
85 pofile = self.html2po(markup)
86 self.compareunit(pofile, 1, "First line.<br>Second line.")
87
89 """Test newlines within the <p> tag."""
90 htmltext = '''<html>
91 <head>
92 </head>
93 <body>
94 <p>
95 A paragraph is a section in a piece of writing, usually highlighting a
96 particular point or topic. It always begins on a new line and usually
97 with indentation, and it consists of at least one sentence.
98 </p>
99 </body>
100 </html>
101 '''
102 self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.")
103 markup = "<p>First\nline.<br>Second\nline.</p>"
104 pofile = self.html2po(markup)
105 self.compareunit(pofile, 1, "First line.<br>Second line.")
106
108 """test that we can extract the <div> tag"""
109 self.check_single("<html><head></head><body><div>A paragraph.</div></body></html>", "A paragraph.")
110 markup = "<div>First line.<br>Second line.</div>"
111 pofile = self.html2po(markup)
112 self.compareunit(pofile, 1, "First line.<br>Second line.")
113
115 """Test linebreaks within a <div> tag."""
116 htmltext = '''<html>
117 <head>
118 </head>
119 <body>
120 <div>
121 A paragraph is a section in a piece of writing, usually highlighting a
122 particular point or topic. It always begins on a new line and usually
123 with indentation, and it consists of at least one sentence.
124 </div>
125 </body>
126 </html>
127 '''
128 self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.")
129 markup = "<div>First\nline.<br>Second\nline.</div>"
130 pofile = self.html2po(markup)
131 self.compareunit(pofile, 1, "First line.<br>Second line.")
132
134 """test that we can extract the <a> tag"""
135 self.check_single('<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>', 'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.')
136
138 """Test that we can extract the <a> tag with newlines in it."""
139 htmltext = '''<html>
140 <head>
141 </head>
142 <body>
143 <p>A
144 paragraph
145 with <a
146 href="http://translate.org.za/">hyperlink</a>
147 and
148 newlines.</p></body></html>
149 '''
150 self.check_single(htmltext, 'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.')
151
153 """Test that we can extract the alt attribute from the <img> tag."""
154 self.check_single('''<html><head></head><body><img src="picture.png" alt="A picture"></body></html>''', "A picture")
155
157 """Test that we can extract the alt attribute from the <img> tag."""
158 htmlsource = '''<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>'''
159 self.check_null(htmlsource)
160
162 """Test that we can extract the summary attribute."""
163 self.check_single( '''<html><head></head><body><table summary="Table summary"></table></body></html>''', "Table summary")
164
174
176 markup = '''<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>'''
177 pofile = self.html2po(markup)
178 self.countunits(pofile, 9)
179 self.compareunit(pofile, 1, "This is the summary")
180 self.compareunit(pofile, 2, "A caption")
181 self.compareunit(pofile, 3, "Head 1")
182 self.compareunit(pofile, 4, "Heading One")
183 self.compareunit(pofile, 5, "Heading Two")
184 self.compareunit(pofile, 6, "Foot One")
185 self.compareunit(pofile, 7, "Foot Two")
186 self.compareunit(pofile, 8, "One")
187 self.compareunit(pofile, 9, "Two")
188
190 """Test that we ignore tables that are empty.
191
192 A table is deemed empty if it has no translatable content.
193 """
194
195 self.check_null('''<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>''')
196 self.check_null('''<html><head></head><body><table><tr><td> </td></tr></table></body></html>''')
197 self.check_null('''<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>''')
198
200 """Test to see if the address element is extracted"""
201 self.check_single("<body><address>My address</address></body>", "My address")
202
204 """Test to see if the h* elements are extracted"""
205 markup = "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>"
206 pofile = self.html2po(markup)
207 self.countunits(pofile, 6)
208 self.compareunit(pofile, 1, "Heading One")
209 self.compareunit(pofile, 2, "Heading Two")
210 self.compareunit(pofile, 3, "Heading Three")
211 self.compareunit(pofile, 4, "Heading Four")
212 self.compareunit(pofile, 5, "Heading Five")
213 self.compareunit(pofile, 6, "Heading Six")
214
216 """Test to see if h* elements with newlines can be extracted"""
217 markup = "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>"
218 pofile = self.html2po(markup)
219 self.countunits(pofile, 6)
220 self.compareunit(pofile, 1, "Heading One")
221 self.compareunit(pofile, 2, "Heading Two")
222 self.compareunit(pofile, 3, "Heading Three")
223 self.compareunit(pofile, 4, "Heading Four")
224 self.compareunit(pofile, 5, "Heading Five")
225 self.compareunit(pofile, 6, "Heading Six")
226
228 """Test to see if the definition list title (dt) element is extracted"""
229 self.check_single("<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>", "Definition List Item Title")
230
232 """Test to see if the definition list description (dd) element is extracted"""
233 self.check_single("<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>", "Definition List Item Description")
234
236 """test to check that we don't double extract a span item"""
237 self.check_single("<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>", "You are a <span>Spanish</span> sentence.")
238
248
250 """check that we use the default style of msgid_comments to disambiguate duplicate messages"""
251 markup = "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>"
252 pofile = self.html2po(markup)
253 self.countunits(pofile, 2)
254
255 self.compareunit(pofile, 1, "Duplicate")
256 self.compareunit(pofile, 2, "Duplicate")
257
259 """check that we reflow multiline content to make it more readable for translators"""
260 self.check_single('''<td valign="middle" width="96%"><font class="headingwhite">South
261 Africa</font></td>''', '''<font class="headingwhite">South Africa</font>''')
262
270
272 """Remove carriage returns from files in dos format."""
273 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r
274 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r
275 <head>\r
276 <!-- InstanceBeginEditable name="doctitle" -->\r
277 <link href="fmfi.css" rel="stylesheet" type="text/css">\r
278 </head>\r
279 \r
280 <body>\r
281 <p>The rapid expansion of telecommunications infrastructure in recent\r
282 years has helped to bridge the digital divide to a limited extent.</p> \r
283 </body>\r
284 <!-- InstanceEnd --></html>\r
285 '''
286
287 self.check_single(htmlsource, 'The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.')
288
290 """Convert HTML input in iso-8859-1 correctly to unicode."""
291 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
292 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->
293 <head>
294 <!-- InstanceBeginEditable name="doctitle" -->
295 <title>FMFI - South Africa - CSIR Openphone - Overview</title>
296 <!-- InstanceEndEditable -->
297 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
298 <meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community">
299
300 <!-- InstanceBeginEditable name="head" -->
301 <!-- InstanceEndEditable -->
302 <link href="../../../fmfi.css" rel="stylesheet" type="text/css">
303 </head>
304
305 <body>
306 <p>We aim to please \x96 will you aim too, please?</p>
307 <p>South Africa\x92s language diversity can be challenging.</p>
308 </body>
309 </html>
310 '''
311 pofile = self.html2po(htmlsource)
312
313 self.countunits(pofile, 4)
314 self.compareunit(pofile, 3, u'We aim to please \x96 will you aim too, please?')
315 self.compareunit(pofile, 4, u'South Africa\x92s language diversity can be challenging.')
316
318 """Ensure that unnecessary html is stripped from the resulting unit."""
319
320 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
321 <html>
322 <head>
323 <title>FMFI - Contact</title>
324 </head>
325 <body>
326 <table width="100%" border="0" cellpadding="0" cellspacing="0">
327 <tr align="left" valign="top">
328 <td width="150" height="556">
329 <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color">
330 <tr>
331 <td align="left" valign="top" height="555">
332 <table width="100%" border="0" cellspacing="0" cellpadding="2">
333 <tr align="left" valign="top" bgcolor="#660000">
334 <td width="4%"><strong></strong></td>
335 <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td>
336 </tr>
337 <tr align="left" valign="top">
338 <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td>
339 <td width="96%"><a href="index.html">Home Page</a></td>
340 </tr>
341 </table>
342 </td>
343 </tr>
344 </table></td>
345 </table>
346 </body>
347 </html>
348 '''
349 pofile = self.html2po(htmlsource)
350 self.countunits(pofile, 3)
351 self.compareunit(pofile, 2, u'Projects')
352 self.compareunit(pofile, 3, u'Home Page')
353
354
355 pofile.units[1].target = 'Projekte'
356 pofile.units[2].target = 'Tuisblad'
357 htmlresult = self.po2html(str(pofile), htmlsource).replace('\n', ' ').replace('= "', '="').replace('> <', '><')
358 snippet = '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>'
359 assert snippet in htmlresult
360 snippet = '<td width="96%"><a href="index.html">Tuisblad</a></td>'
361 assert snippet in htmlresult
362
364 """Tests running actual html2po commands on files"""
365 convertmodule = html2po
366 defaultoptions = {"progress": "none"}
367
374