Python高级编程–正则表达式（习题）-白红宇

Python高级编程–正则表达式（习题）

阅读量：6799 次

发布时间：2019-06-26

本文共 8089 字，大约阅读时间需要 26 分钟。

原文：http://start2join.me/python-regex-answer-20141030/

###########################################

#########Exercises after Chapter 15th ##########

###########################################

相关学习书籍：《Python核心编程》第十五章——正则表达式习题与答案。

正则表达式相关知识：

章节练习题及答案：

所有程序首行加入引入re模块代码：

import re

#15-1:识别下列字符串：bat、bit、but、hat、hit、或hut。

print('''#15-1:识别下列字符串：bat、bit、but、hat、hit、或hut。''')strList = ['bat', 'bit', 'but', 'hat', 'hit', 'hut']for c in strList:    print (re.match(r'[bh][aiu]t', c).group())

#15-2:匹配用一个空格分隔的任意一对单词，比如名和姓。

print('''#15-2:匹配用一个空格分隔的任意一对单词，比如名和姓。''')result = re.match(r'[A-Za-z]+\s[A-Za-z]+', 'Jim Green')print(result.group())

#15-3:匹配用一个逗号和一个空格分开的一个单词和一个字母。例如英文人名中的姓和名的首字母。

print('''#15-3:匹配用一个逗号和一个空格分开的一个单词和一个字母。例如英文人名中的姓和名的首字母。''')result = re.match(r'([A-Za-z]\.)+?\s[A-Za-z]+', 'J. Green')print(result.group())

#15-4:匹配所有合法的Python标识符。注：字母下划线打头，后面接任意数字字母和下划线

print('''#15-4:匹配所有合法的Python标识符。注：字母下划线打头，后面接任意数字字母和下划线''')valueList = ['10_mys', '_myValue09', 'Post99', \             '*hahah', 'list@%']for c in valueList:    result = re.match(r'[a-zA-Z_][\w_]+$', c)    if result:        print(c+':'+result.group())    else:        print(c+':Illegal...')

#15-6:匹配简单的以www.开头以.com结尾的web域名。例如www.baidu.com。附加题：使你写的表达式还支持其他的顶级域名。如：.edu, .net等。

print('''#15-6:匹配简单的以www.开头以.com结尾的web域名。例如www.baidu.com。附加题：使你写的表达式还支持其他的顶级域名。如：.edu, .net等。''')comList = ['com', 'edu', 'net']result = re.match(r'www.\w+.com', 'www.baidu.com')print(result.group(0))result = re.match(r'www[.\w]+', 'www.yahoo.net')if result:    print(result.group(0))

#15-7:匹配全体Python整形的字符串表示形式的集合

print('''#15-7:匹配全体Python整形的字符串表示形式的集合''')result = re.match(r'\d+[Ll]?', '43545464')print(result.group(0))

#15-8:匹配全体Python长整形的字符串表示形式的集合

print('''#15-8:匹配全体Python长整形的字符串表示形式的集合''')result = re.match(r'\d+[Ll]', '43545464L')print(result.group(0))

#15-9:匹配全体Python浮点形的字符串表示形式的集合

print('''#15-9:匹配全体Python浮点形的字符串表示形式的集合''')result = re.match(r'\d+\.?\d+', '43545464.00001')print(result.group(0))

#15-10:匹配全体Python复数形的字符串表示形式的集合

print('''#15-10:匹配全体Python复数形的字符串表示形式的集合''')result = re.match(r'\d+\.?\d+\+\d+\.?\d+j', '4.5+0.1j')print(result.group(0))

#15-11:匹配所有电子邮件的字符串表示形式的集合

print('''#15-11:匹配所有电子邮件的字符串表示形式的集合''')result = re.match(r'\w+@(\w+\.)*\w+', 'jim@host.cool.com')print(result.group(0))

#15-12:匹配所有web网站地址的字符串表示形式的集合

print('''#15-12:匹配所有web网站地址的字符串表示形式的集合''')result = re.match(r'\w+(\w+\.)*\w+', 'www.host.cool.com')print(result.group(0))

#15-13:匹配标准日历上的前9个月

print('''#15-13:匹配标准日历上的前9个月''')result = re.match(r'0?[1-9]', '07')print(result.group(0))

#15-14:匹配标准日历上的后三个月

print('''#15-14:匹配标准日历上的后三个月''')result = re.match(r'1[0-2]', '11')print(result.group(0))

#15-15:信用卡问题：15位信用卡的格式是4-6-5。16位的信用卡格式是4-4-4-4，位数不足则添0补充。

print('''#15-15:信用卡问题：15位信用卡的格式是4-6-5。16位的信用卡格式是4-4-4-4，位数不足则添0补充。''')cardNo = '1321-1544-6511-0001'result = re.findall(r'(\d)', cardNo)if 16 == len(result):    reResult = re.match(r'[\d]{4}-[\d]{4}-[\d]{4}-[\d]{4}', \                       cardNo)    if reResult:        print('It is OK!')    else:        print('It is Illegal...')elif 15 == len(result):    reResult = re.match(r'[\d]{4}-[\d]{6}-[\d]{5}', cardNo)    if reResult:        print('It is OK!')    else:        print('It is Illegal...')else:    print('It is Illegal...')

#15-16:修改getdata.py，将生成的数据存入redate.txt中。

print('''#15-16:修改getdata.py，将生成的数据存入redate.txt中。''')from random import randint, choiceimport stringfrom time import ctimelowercase = []for c in 'abcdefghijklmnopqrstuvwxyz':    lowercase.append(c)maxint = 23554545646doms = ('com', 'edu', 'net', 'org', 'gov')f = open('redata.txt', 'w+')for i in range(randint(5, 10)):    dtint = randint(0, maxint - 1)    dtstr = ctime(dtint)    shorter = randint(4, 7)    em = ''    for j in range(shorter):        em += choice(lowercase)    longer = randint(shorter, 12)    dn = ''    for j in range(longer):        dn += choice(lowercase)    f.write('%s::%s@%s.%s::%d-%d-%d\n' %          (dtstr, em, dn, choice(doms), \          dtint, shorter, longer))    print('%s::%s@%s.%s::%d-%d-%d' %          (dtstr, em, dn, choice(doms), \          dtint, shorter, longer))f.close();

#15-17:统计redata.txt中各星期出现的次数。

print('''#15-17:统计redata.txt中各星期出现的次数。''')    mon, tue, wed, thu, fri, sat, sun = 0, 0, 0, 0, 0, 0, 0 with open('redata.txt', 'r') as f:    for line in f:        result = re.match(r'^[\w]{3}', line)        if result.group() == 'Mon':            mon += 1        elif result.group() == 'Tue':            tue += 1        elif result.group() == 'Wed':            wed += 1        elif result.group() == 'Thu':            thu += 1        elif result.group() == 'Fri':            fri += 1        elif result.group() == 'Sat':            sat += 1        elif result.group() == 'Sun':            fri += 1print('%s\t%s\t%s\t%s\t%s\t%s\t%s' \     % ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'))print('%d\t%d\t%d\t%d\t%d\t%d\t%d\n' \     % (mon, tue, wed, thu, fri, sat, sun))f.close()

#15-18:通过正文部分的整形与时间戳进行比较，判断文件是否完整。

print('''#15-18:通过正文部分的整形与时间戳进行比较，判断文件是否完整。''')with open('redata.txt', 'r') as f:    for line in f:        timeStr = re.match(r'.+\s\d{4}', line)        timeInt = re.match(r'.+::(\d+)-', line)        if ctime(int(timeInt.group(1))) != timeStr.group(0):            print(ctime(int(timeInt.group(1))))            print('File is Error...')            break;

#15-19:提取每行中完整的时间戳字段。

print('''#15-19:提取每行中完整的时间戳字段。''')with open('redata.txt', 'r') as f:    for line in f:        timeStr = re.match(r'.+\s\d{4}', line)        print(timeStr.group(0))

#15-20:提取每行中完整的电子邮件字段。

print('''#15-20:提取每行中完整的电子邮件字段。''')with open('redata.txt', 'r') as f:    for line in f:        mailStr = re.match(r'.+::(.+)::', line)        print(mailStr.group(1))

#15-21:只提取每行中的月份。

print('''#15-21:只提取每行中的月份。''')with open('redata.txt', 'r') as f:    for line in f:        mailStr = re.match(r'.+\s([\w]{3})\s', line)        print(mailStr.group(1))

#15-22:只提取每行中的年份。

print('''#15-22:只提取每行中的年份。''')with open('redata.txt', 'r') as f:    for line in f:        mailStr = re.match(r'.+\s([\w]{4})::', line)        print(mailStr.group(1))

#15-23:只提取每行中的时间字段（HH:MM:SS）。

print('''#15-23:只提取每行中的时间字段（HH:MM:SS）。''')with open('redata.txt', 'r') as f:    for line in f:        mailStr = re.match(r'.+\d\s(.+)\s\d', line)        print(mailStr.group(1))

#15-24:只提取电子邮件中登录名和域名(连接提取).

print('''#15-24:只提取电子邮件中登录名和域名(连接提取).''')with open('redata.txt', 'r') as f:    for line in f:        mailStr = re.match(r'.+::(.+)\.', line)        print(mailStr.group(1))

#15-25:只提取电子邮件中登录名和域名(分别提取)。

print('''#15-25:只提取电子邮件中登录名和域名(分别提取)。''')with open('redata.txt', 'r') as f:    for line in f:        nameStr = re.match(r'.+::(\w+)@', line)        zoneStr = re.match(r'.+@(\w+)\.', line)        print(nameStr.group(1) + '-->' + zoneStr.group(1))

#15-26:将每行的电子邮件替换为你自己的电子邮件地址。

print('''#15-26:将每行的电子邮件替换为你自己的电子邮件地址。''')lines = []with open('redata.txt', 'r') as r:    for line in r:        nameStr = re.sub(r'\w+@\w+\.\w+', \                        'cf@host.com', line)        lines.append(nameStr)with open('redata.txt', 'w+') as w:    for line in lines:        w.write(line)with open('redata.txt', 'r') as r:    for line in r:        print(line)

#15-27:提取出时间戳中的月日年，并按照’月日，年’显示出来(每行只遍历一次)。

print('''#15-27:提取出时间戳中的月日年，并按照'月 日， 年'显示出来(每行只遍历一次)。''')with open('redata.txt', 'r') as f:    for line in f:        dateStr = \         re.match( \           r'.+\s([\w]{3}[\s]{1,2}[\d]{1,2})\s.+\s(\d+):', \           line)        print(dateStr.group(1) + ', ' + dateStr.group(2))

#15-28:提取电话号码3-3-4，要求区号是可选的(800-555-1212和555-1212都可以匹配)。

print('''#15-28:提取电话号码3-3-4，要求区号是可选的。(800-555-1212和555-1212都可以匹配)。''')numList = ['800-555-1212', '555-1212']for nums in numList:    number = re.match(r'(\d{3}-)?\d{3}-\d{4}', nums)    print(number.group())

#15-29:提取电话号码3-3-4，要求区号可以包含园括号或是连字符，他们是可选的(800-555-1212、555-1212或(800)555-1212都可以匹配)。

print('''#15-29:提取电话号码3-3-4，要求区号可以包含园括号或是连字符，他们是可选的(800-555-1212、555-1212或(800)555-1212都可以匹配)。''')numList = ['800-555-1212', '555-1212', '(800)555-1212']for nums in numList:    number = \    re.match(r'(\(\d{3}\)|(\d{3}-))?\d{3}-\d{4}', nums)    print(number.group())

转载于:https://www.cnblogs.com/zhizhan/p/4542455.html

你可能感兴趣的文章

升级aws ec2主机配置

查看>>

CentOS 6.5 svn服务器1.0版

查看>>

RED7防火墙

查看>>

FreeNAS8 ISCSI target & initiator for linux/windows

查看>>

cisco 3560交换机和H3C S5120 链路聚合配置实例。

查看>>