2012-04-07 23:05:57 +02:00
|
|
|
# RFC 2822 - style email validation for Python
|
|
|
|
# (c) 2012 Syrus Akbary <me@syrusakbary.com>
|
|
|
|
# Extended from (c) 2011 Noel Bush <noel@aitools.org>
|
|
|
|
# for support of mx and user check
|
|
|
|
# This code is made available to you under the GNU LGPL v3.
|
|
|
|
#
|
|
|
|
# This module provides a single method, valid_email_address(),
|
|
|
|
# which returns True or False to indicate whether a given address
|
|
|
|
# is valid according to the 'addr-spec' part of the specification
|
|
|
|
# given in RFC 2822. Ideally, we would like to find this
|
|
|
|
# in some other library, already thoroughly tested and well-
|
|
|
|
# maintained. The standard Python library email.utils
|
|
|
|
# contains a parse_addr() function, but it is not sufficient
|
|
|
|
# to detect many malformed addresses.
|
|
|
|
#
|
|
|
|
# This implementation aims to be faithful to the RFC, with the
|
|
|
|
# exception of a circular definition (see comments below), and
|
|
|
|
# with the omission of the pattern components marked as "obsolete".
|
|
|
|
|
|
|
|
import re
|
|
|
|
import smtplib
|
2013-06-01 21:54:39 +02:00
|
|
|
import logging
|
2013-06-01 23:15:37 +02:00
|
|
|
import socket
|
2012-04-07 23:05:57 +02:00
|
|
|
|
2014-10-12 20:51:42 +02:00
|
|
|
try:
|
|
|
|
raw_input
|
|
|
|
except NameError:
|
|
|
|
def raw_input(prompt=''):
|
|
|
|
return input(prompt)
|
|
|
|
|
2012-04-07 23:05:57 +02:00
|
|
|
try:
|
|
|
|
import DNS
|
|
|
|
ServerError = DNS.ServerError
|
2014-04-08 12:10:38 +02:00
|
|
|
DNS.DiscoverNameServers()
|
2013-06-01 21:54:39 +02:00
|
|
|
except ImportError:
|
2012-04-07 23:05:57 +02:00
|
|
|
DNS = None
|
2013-06-01 21:54:39 +02:00
|
|
|
|
|
|
|
class ServerError(Exception):
|
|
|
|
pass
|
|
|
|
|
2012-04-07 23:05:57 +02:00
|
|
|
# All we are really doing is comparing the input string to one
|
|
|
|
# gigantic regular expression. But building that regexp, and
|
|
|
|
# ensuring its correctness, is made much easier by assembling it
|
|
|
|
# from the "tokens" defined by the RFC. Each of these tokens is
|
|
|
|
# tested in the accompanying unit test file.
|
|
|
|
#
|
|
|
|
# The section of RFC 2822 from which each pattern component is
|
|
|
|
# derived is given in an accompanying comment.
|
|
|
|
#
|
|
|
|
# (To make things simple, every string below is given as 'raw',
|
|
|
|
# even when it's not strictly necessary. This way we don't forget
|
|
|
|
# when it is necessary.)
|
|
|
|
#
|
|
|
|
WSP = r'[ \t]' # see 2.2.2. Structured Header Field Bodies
|
|
|
|
CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
|
|
|
|
NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
|
|
|
|
QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
|
|
|
|
FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
WSP + r'+)' # see 3.2.3. Folding white space and comments
|
2012-04-07 23:05:57 +02:00
|
|
|
CTEXT = r'[' + NO_WS_CTL + \
|
2013-06-01 21:54:39 +02:00
|
|
|
r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
|
2012-04-07 23:05:57 +02:00
|
|
|
CCONTENT = r'(?:' + CTEXT + r'|' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here
|
|
|
|
# as well, but that would be circular.)
|
2012-04-07 23:05:57 +02:00
|
|
|
COMMENT = r'\((?:' + FWS + r'?' + CCONTENT + \
|
2013-06-01 21:54:39 +02:00
|
|
|
r')*' + FWS + r'?\)' # see 3.2.3
|
2012-04-07 23:05:57 +02:00
|
|
|
CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3
|
2012-04-07 23:05:57 +02:00
|
|
|
ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
|
|
|
|
ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4
|
|
|
|
DOT_ATOM_TEXT = ATEXT + r'+(?:\.' + ATEXT + r'+)*' # see 3.2.4
|
|
|
|
DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4
|
|
|
|
QTEXT = r'[' + NO_WS_CTL + \
|
2013-06-01 21:54:39 +02:00
|
|
|
r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
|
2012-04-07 23:05:57 +02:00
|
|
|
QCONTENT = r'(?:' + QTEXT + r'|' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
QUOTED_PAIR + r')' # see 3.2.5
|
2012-04-07 23:05:57 +02:00
|
|
|
QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \
|
2013-06-01 21:54:39 +02:00
|
|
|
r'?' + QCONTENT + r')*' + FWS + \
|
|
|
|
r'?' + r'"' + CFWS + r'?'
|
2012-04-07 23:05:57 +02:00
|
|
|
LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification
|
2012-04-07 23:05:57 +02:00
|
|
|
DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1
|
|
|
|
DCONTENT = r'(?:' + DTEXT + r'|' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
QUOTED_PAIR + r')' # see 3.4.1
|
2012-04-07 23:05:57 +02:00
|
|
|
DOMAIN_LITERAL = CFWS + r'?' + r'\[' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
r'(?:' + FWS + r'?' + DCONTENT + \
|
|
|
|
r')*' + FWS + r'?\]' + CFWS + r'?' # see 3.4.1
|
2012-04-07 23:05:57 +02:00
|
|
|
DOMAIN = r'(?:' + DOT_ATOM + r'|' + \
|
2013-06-01 21:54:39 +02:00
|
|
|
DOMAIN_LITERAL + r')' # see 3.4.1
|
2012-04-07 23:05:57 +02:00
|
|
|
ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1
|
|
|
|
|
|
|
|
# A valid address will match exactly the 3.4.1 addr-spec.
|
|
|
|
VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
|
|
|
|
|
2014-01-07 12:51:50 +01:00
|
|
|
MX_DNS_CACHE = {}
|
2014-04-08 12:06:31 +02:00
|
|
|
MX_CHECK_CACHE = {}
|
2014-01-07 12:51:50 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_mx_ip(hostname):
|
|
|
|
if hostname not in MX_DNS_CACHE:
|
2014-04-08 11:59:07 +02:00
|
|
|
try:
|
|
|
|
MX_DNS_CACHE[hostname] = DNS.mxlookup(hostname)
|
2014-10-12 20:51:42 +02:00
|
|
|
except ServerError as e:
|
2014-04-08 11:59:07 +02:00
|
|
|
if e.rcode == 3: # NXDOMAIN (Non-Existent Domain)
|
|
|
|
MX_DNS_CACHE[hostname] = None
|
|
|
|
else:
|
|
|
|
raise
|
2014-01-07 12:51:50 +01:00
|
|
|
|
|
|
|
return MX_DNS_CACHE[hostname]
|
|
|
|
|
2012-04-07 23:05:57 +02:00
|
|
|
|
2014-03-20 12:41:21 +01:00
|
|
|
def validate_email(email, check_mx=False, verify=False, debug=False, smtp_timeout=10):
|
2012-04-07 23:05:57 +02:00
|
|
|
"""Indicate whether the given string is a valid email address
|
|
|
|
according to the 'addr-spec' portion of RFC 2822 (see section
|
|
|
|
3.4.1). Parts of the spec that are marked obsolete are *not*
|
|
|
|
included in this test, and certain arcane constructions that
|
|
|
|
depend on circular definitions in the spec may not pass, but in
|
|
|
|
general this should correctly identify any email address likely
|
|
|
|
to be in use as of 2011."""
|
2013-06-01 21:54:39 +02:00
|
|
|
if debug:
|
|
|
|
logger = logging.getLogger('validate_email')
|
2013-06-01 23:15:37 +02:00
|
|
|
logger.setLevel(logging.DEBUG)
|
2013-06-01 21:54:39 +02:00
|
|
|
else:
|
|
|
|
logger = None
|
|
|
|
|
2012-04-07 23:05:57 +02:00
|
|
|
try:
|
|
|
|
assert re.match(VALID_ADDRESS_REGEXP, email) is not None
|
2012-04-07 23:13:13 +02:00
|
|
|
check_mx |= verify
|
|
|
|
if check_mx:
|
2013-06-01 21:54:39 +02:00
|
|
|
if not DNS:
|
|
|
|
raise Exception('For check the mx records or check if the email exists you must '
|
|
|
|
'have installed pyDNS python package')
|
|
|
|
hostname = email[email.find('@') + 1:]
|
2014-01-07 12:51:50 +01:00
|
|
|
mx_hosts = get_mx_ip(hostname)
|
2014-04-08 11:59:07 +02:00
|
|
|
if mx_hosts is None:
|
|
|
|
return False
|
2012-04-07 23:05:57 +02:00
|
|
|
for mx in mx_hosts:
|
|
|
|
try:
|
2014-04-08 12:06:31 +02:00
|
|
|
if not verify and mx[1] in MX_CHECK_CACHE:
|
|
|
|
return MX_CHECK_CACHE[mx[1]]
|
2014-03-20 12:41:21 +01:00
|
|
|
smtp = smtplib.SMTP(timeout=smtp_timeout)
|
2012-04-07 23:05:57 +02:00
|
|
|
smtp.connect(mx[1])
|
2014-04-08 12:06:31 +02:00
|
|
|
MX_CHECK_CACHE[mx[1]] = True
|
2013-05-02 14:01:48 +02:00
|
|
|
if not verify:
|
2014-04-08 11:47:42 +02:00
|
|
|
try:
|
|
|
|
smtp.quit()
|
|
|
|
except smtplib.SMTPServerDisconnected:
|
|
|
|
pass
|
2013-05-02 14:01:48 +02:00
|
|
|
return True
|
2012-04-07 23:05:57 +02:00
|
|
|
status, _ = smtp.helo()
|
2013-05-02 14:01:48 +02:00
|
|
|
if status != 250:
|
|
|
|
smtp.quit()
|
2013-06-01 21:54:39 +02:00
|
|
|
if debug:
|
|
|
|
logger.debug(u'%s answer: %s - %s', mx[1], status, _)
|
2013-05-02 14:01:48 +02:00
|
|
|
continue
|
2012-04-07 23:05:57 +02:00
|
|
|
smtp.mail('')
|
|
|
|
status, _ = smtp.rcpt(email)
|
2013-06-01 21:54:39 +02:00
|
|
|
if status == 250:
|
2013-05-02 14:01:48 +02:00
|
|
|
smtp.quit()
|
2013-06-01 21:54:39 +02:00
|
|
|
return True
|
|
|
|
if debug:
|
|
|
|
logger.debug(u'%s answer: %s - %s', mx[1], status, _)
|
2013-05-02 14:01:48 +02:00
|
|
|
smtp.quit()
|
2013-06-01 21:54:39 +02:00
|
|
|
except smtplib.SMTPServerDisconnected: # Server not permits verify user
|
|
|
|
if debug:
|
|
|
|
logger.debug(u'%s disconected.', mx[1])
|
2012-04-07 23:05:57 +02:00
|
|
|
except smtplib.SMTPConnectError:
|
2013-06-01 21:54:39 +02:00
|
|
|
if debug:
|
|
|
|
logger.debug(u'Unable to connect to %s.', mx[1])
|
|
|
|
return None
|
|
|
|
except AssertionError:
|
2012-04-07 23:05:57 +02:00
|
|
|
return False
|
2013-06-01 23:15:37 +02:00
|
|
|
except (ServerError, socket.error) as e:
|
2013-06-01 21:54:39 +02:00
|
|
|
if debug:
|
2013-06-01 23:15:37 +02:00
|
|
|
logger.debug('ServerError or socket.error exception raised (%s).', e)
|
2013-06-01 21:54:39 +02:00
|
|
|
return None
|
2012-04-07 23:05:57 +02:00
|
|
|
return True
|
|
|
|
|
2013-06-01 23:15:37 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
import time
|
|
|
|
while True:
|
|
|
|
email = raw_input('Enter email for validation: ')
|
|
|
|
|
|
|
|
mx = raw_input('Validate MX record? [yN] ')
|
|
|
|
if mx.strip().lower() == 'y':
|
|
|
|
mx = True
|
|
|
|
else:
|
|
|
|
mx = False
|
|
|
|
|
|
|
|
validate = raw_input('Try to contact server for address validation? [yN] ')
|
|
|
|
if validate.strip().lower() == 'y':
|
|
|
|
validate = True
|
|
|
|
else:
|
|
|
|
validate = False
|
|
|
|
|
|
|
|
logging.basicConfig()
|
|
|
|
|
2014-03-20 12:41:21 +01:00
|
|
|
result = validate_email(email, mx, validate, debug=True, smtp_timeout=1)
|
2013-06-01 23:15:37 +02:00
|
|
|
if result:
|
2014-10-12 20:51:42 +02:00
|
|
|
print("Valid!")
|
2013-06-01 23:15:37 +02:00
|
|
|
elif result is None:
|
2014-10-12 20:51:42 +02:00
|
|
|
print("I'm not sure.")
|
2013-06-01 23:15:37 +02:00
|
|
|
else:
|
2014-10-12 20:51:42 +02:00
|
|
|
print("Invalid!")
|
2013-06-01 23:15:37 +02:00
|
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
2012-04-07 23:05:57 +02:00
|
|
|
# import sys
|
|
|
|
|
|
|
|
# sys.modules[__name__],sys.modules['validate_email_module'] = validate_email,sys.modules[__name__]
|
|
|
|
# from validate_email_module import *
|