有如下数据,需要把 . 去掉
# -*- coding: utf-8 -*-"""Created on Mon Sep 25 20:47:33 @author: Don"""import ref = open("84.txt",'rb')r = open("84_result.txt","w+")corpus = bytes.decode(f.read()).split("\n")s = r'\d{8}.\d{2}' #正则表达式匹配for i in range(len(corpus)):m = re.match(s, corpus[i])if m is None: #要先判断是否为空continuestr = m.group(0)#得到匹配的字符串repStr= str.replace(".","")corpus[i] = corpus[i].replace(str, repStr)r.write(corpus[i] + '\n')r.close()f.close()
有如下数据,需要去掉 . 并把不足十位的最后补零
# -*- coding: utf-8 -*-"""Created on Tue Sep 26 10:46:12 @author: Don"""import reimport copyf = open("index.txt",'rb')r = open("index_result.txt","w+")corpus = bytes.decode(f.read()).split("\n")corpuscopy = copy.copy(corpus)for i in range(len(corpus)):if corpus[i] is None: #判断空行continue tmp = corpus[i].split()if len(tmp) == 0:continueindextmp = len(tmp[-1])if '.' in tmp[-1]:#定位. 并删除index = tmp[-1].find('.')tmp[-1] = tmp[-1][:index] + tmp[-1][index+1:]if len(tmp[-1]) < 10: #不足十位的报关码要补全tmpstr = ''for k in range(10 - len(tmp[-1])):tmpstr = tmpstr + '0'tmp[-1] = tmp[-1] + tmpstrcorpuscopy[i] = corpuscopy[i][:len(corpuscopy[i]) - indextmp] + tmp[-1]r.write(corpuscopy[i] + '\n')r.close()f.close()