1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """A method of validating e-mail addresses and mail domains.
19
20 This module aims to provide the ultimate functions for:
21 * domain validation, and
22 * e-mail validation.
23
24 Why not just use a regular expression?
25 ======================================
26 http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx
27
28 There are many regular expressions out there for this. The "perfect one" is
29 several KB long and therefore unmaintainable (Perl people wrote it...).
30
31 This is 2009 and domain rules are changing too. Impossible domain names have
32 become possible, international domain names are real...
33
34 So validating an e-mail address is more complex than you might think. Take a
35 look at some of the rules:
36 http://en.wikipedia.org/wiki/E-mail_address#RFC_specification
37
38 How to do it then?
39 ==================
40 I believe the solution should combine simple regular expressions with
41 imperative programming.
42
43 E-mail validation is also dependent on the robustness principle:
44 "Be conservative in what you do, be liberal in what you accept from others."
45 http://en.wikipedia.org/wiki/Postel%27s_law
46
47 This module recognizes that e-mail validation can be done in several different
48 ways, according to purpose:
49
50 1) Most of the time you just want validation according to the standard rules.
51 So just say: v = EmailValidator()
52
53 2) If you are creating e-mail addresses for your server or your organization,
54 you might need to satisfy a stricter policy such as "dash is not allowed in
55 email addresses". The EmailValidator constructor accepts a *local_part_chars*
56 argument to help build the right regular expression for you.
57 Example: v = EmailValidator(local_part_chars='.-+_')
58
59 3) What about typos? An erroneous dot at the end of a typed email is typical.
60 Other common errors with the dots revolve around the @: user@.domain.com.
61 These typing mistakes can be automatically corrected, saving you from doing
62 it manually. For this you use the *fix* flag when instantiating a validator:
63
64 d = DomainValidator(fix=True)
65 domain, error_message = d.validate('.supercalifragilistic.com.br')
66 if error_message:
67 print 'Invalid domain: ' + domain
68 else:
69 print 'Valid domain: ' + domain
70
71 4) TODO: Squash the bugs in this feature!
72 Paranoid people may wish to verify that the informed domain actually exists.
73 For that you can pass a *lookup_dns='a'* argument to the constructor, or even
74 *lookup_dns='mx'* to verify that the domain actually has e-mail servers.
75 To use this feature, you need to install the *pydns* library:
76
77 easy_install -UZ pydns
78
79 How to use
80 ==========
81
82 The validating methods return a tuple (email, error_msg).
83 *email* is the trimmed and perhaps fixed email.
84 *error_msg* is an empty string when the e-mail is valid.
85
86 Typical usage is:
87
88 v = EmailValidator() # or EmailValidator(fix=True)
89 email = raw_input('Type an email: ')
90 email, err = v.validate(email)
91 if err:
92 print 'Error: ' + err
93 else:
94 print 'E-mail is valid: ' + email # the email, corrected
95
96 There is also an EmailHarvester class to collect e-mail addresses from any text.
97
98 Authors: Nando Florestan, Marco Ferreira
99 Code written in 2009 and donated to the public domain.
100 """
101
102
103 import re
104
105
106 __all__ = ['BaseValidator', 'ValidationException', 'EmailValidator']
107
108
109
110
111
112
113
114
115
116
117
118
121
122
125 """Some people would condemn this whole module screaming:
126 "Don't return success codes, use exceptions!"
127 This method allows them to be happy, too.
128 """
129
130 validatee, err = self.validate(*a, **k)
131 if err:
132 raise ValidationException(err)
133 else:
134 return validatee
135
136
137 -class DomainValidator(BaseValidator):
138 """A domain name validator that is ready for internationalized domains.
139
140 http://en.wikipedia.org/wiki/Internationalized_domain_name
141 http://en.wikipedia.org/wiki/Top-level_domain
142 """
143
144
145
146 domain_pattern = r'[\w]+([\w\.\-]+\w)?'
147 domain_regex = \
148 re.compile('^' + domain_pattern + '$', re.IGNORECASE | re.UNICODE)
149
150
151
152
153
154
155 false_positive_ips = ['208.67.217.132']
156
157 - def __init__(self, fix=False, lookup_dns=None):
158 self.fix = fix
159 if lookup_dns:
160 lookup_dns = lookup_dns.lower()
161 if not lookup_dns == 'a' and not lookup_dns == 'mx':
162 raise RuntimeError("Not a valid *lookup_dns* value: " + lookup_dns)
163 self._lookup_dns = lookup_dns
164
165 - def _apply_common_rules(self, part, maxlength):
166 """This method contains the rules that must be applied to both the
167 domain and the local part of the e-mail address.
168 """
169 part = part.strip()
170 if self.fix:
171 part = part.strip('.')
172 if not part:
173 return part, 'It cannot be empty.'
174 if len(part) > maxlength:
175 return part, 'It cannot be longer than %i chars.' % maxlength
176 if part[0] == '.':
177 return part, 'It cannot start with a dot.'
178 if part[-1] == '.':
179 return part, 'It cannot end with a dot.'
180 if '..' in part:
181 return part, 'It cannot contain consecutive dots.'
182 return part, ''
183
184 - def validate_domain(self, part):
185 part, err = self._apply_common_rules(part, maxlength=255)
186 if err:
187 return part, 'Invalid domain: %s' % err
188 if not self.domain_regex.search(part):
189 return part, 'Invalid domain.'
190 if self._lookup_dns and not self.lookup_domain(part):
191 return part, 'Domain does not seem to exist.'
192 else:
193 return part.lower(), ''
194
195 validate = validate_domain
196
197
198
199
200 - def lookup_domain(self, domain, lookup_record=None):
201 """Looks up the DNS record for *domain* and returns:
202
203 * None if it does not exist,
204 * The IP address if looking up the "A" record, or
205 * The list of hosts in the "MX" record.
206
207 The return value, if treated as a boolean, says whether a domain exists.
208
209 You can pass "a" or "mx" as the *lookup_record* parameter. Otherwise,
210 the *lookup_dns* parameter from the constructor is used.
211 "a" means verify that the domain exists.
212 "mx" means verify that the domain exists and specifies mail servers.
213 """
214 if lookup_record:
215 lookup_record = lookup_record.lower()
216 else:
217 lookup_record = self._lookup_dns
218 result = None
219 if lookup_record == "a":
220 request = DNS.Request(domain)
221 try:
222 answers = request.req().answers
223 except DNS.Lib.PackError, err:
224
225 return False
226
227
228 if answers:
229 result = answers[0]['data']
230 if result in self.false_positive_ips:
231 result = None
232
233 else:
234
235
236 pass
237 else:
238
239 pass
240 elif lookup_record == "mx":
241 result = DNS.mxlookup(domain)
242
243
244
245
246 else:
247 raise RuntimeError("Not a valid lookup_record value: " \
248 + lookup_record)
249 return result
250
251
252
254
255
256
257
258
259
260 - def __init__(self, local_part_chars=".-+_!#$%&'/=`|~?^{}*", **k):
261 super(EmailValidator, self).__init__(**k)
262
263 self.local_part_pattern = '[a-z0-9' \
264 + local_part_chars.replace('-', r'\-') + ']+'
265
266 self.local_part_regex = \
267 re.compile('^' + self.local_part_pattern + '$', re.IGNORECASE)
268
270 part, err = self._apply_common_rules(part, maxlength=64)
271 if err:
272 return part, 'Invalid local part: %s' % err
273 if not self.local_part_regex.search(part):
274 return part, 'Invalid local part.'
275 return part, ''
276
277
279 if not email:
280 return email, 'The e-mail is empty.'
281 parts = email.split('@')
282 if len(parts) != 2:
283 return email, 'An email address must contain a single @'
284 local, domain = parts
285
286
287 domain, err = self.validate_domain(domain)
288 if err:
289 return email, \
290 "The e-mail has a problem to the right of the @: %s" % err
291
292 local, err = self.validate_local_part(local)
293 if err:
294 return email, \
295 "The email has a problem to the left of the @: %s" % err
296
297 return local + '@' + domain, ''
298
299 validate = validate_email
300
301
302
305 super(EmailHarvester, self).__init__(*a, **k)
306
307 self.harvest_regex = \
308 re.compile(self.local_part_pattern + '@' + self.domain_pattern,
309 re.IGNORECASE | re.UNICODE)
310
312 """Iterator that yields the e-mail addresses contained in *text*."""
313 for match in self.harvest_regex.finditer(text):
314
315
316 yield match.group().replace('..', '.')
317
318
319
320 if __name__ == '__main__':
321 d = DomainValidator()
322 domain, err = d.validate(u'acentuação.com')
323 assert not err
324 domain, err = d.validate(u'tld.ácçênts')
325 assert not err
326 domain, err = d.validate(u'subdomain.subdomain.subdomain.sub.domain.tld')
327 assert not err
328 domain, err = d.validate(u'.com')
329 assert err
330
331 v = EmailValidator()
332
333 email, err = v.validate(u'Dmitry.Shostakovich@great-music.com')
334 assert not err
335 email, err = v.validate(u' ha@ha.ha ')
336 assert not err
337 email, err = v.validate(u"a.a-a+a_a!a#a$a%a&a'a/a=a`a|a~a?a^a{a}" \
338 u"a*a@special.chars")
339 assert not err
340 email, err = v.validate(u'user+mailbox@example.com')
341 assert not err
342 email, err = v.validate(u'customer/department=shipping@example.com')
343 assert not err
344 email, err = v.validate(u'$A12345@example.com')
345 assert not err
346 email, err = v.validate(u'!def!xyz%abc@example.com')
347 assert not err
348 email, err = v.validate(u'_somename@example.com')
349 assert not err
350
351 email, err = v.validate(u'Abc.example.com')
352 assert err
353 email, err = v.validate(u'A@b@example.com')
354 assert err
355 email, err = v.validate(u'Abc.@example.com')
356 assert err
357 email, err = v.validate(u'Abc..123@example.com')
358 assert err
359 email, err = v.validate(u'ã@example.com')
360 assert err
361 email, err = v.validate(u'\@example.com')
362 assert err
363
364 v = EmailValidator(lookup_dns='a')
365 while True:
366 email = raw_input('Type an email or CTRL-C to quit: ').decode('utf8')
367 email, err = v.validate(email)
368 if err:
369 print 'Error: ' + err
370 else:
371 print 'E-mail is valid: ' + email
372