# Recheck false postive nummeric errors (mostly dates) in the QA report from xbench.# Convert the following forms of date into standard form 'yyyymmdd' using norm_endate and norm_endate functions# dateformats ='''# 02 Aug, 2020# 02 Aug, 20# 02 Aug# 02/Dec/2020# 02/Aug/02# 02/Aug# 02-January-2020# 02-Aug-20# 02-Aug# Aug 2, 2020# Aug 12, 20# Aug 12# Aug/22/2020# Aug/02/82# Aug/02# January-02-2020# January-02-20# January-02# Jan-02-2020# '''import xlrd,re,osfrom datetime import datetimemdic={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12","January":"01","February":"02","March":"03","April":"04","May":"05","June":"06","July":"07","August":"08","September":"09","October":"10","November":"11","December":"12"}def norm_endate(instr): matches=re.search('((\d{1,2})(-|/| )*([A-Z]{1}[a-z]{2,9})(-|/| |,)*(\d{0,4}))|(([A-Z]{1}[a-z]{2,9})(-|/| )*(\d{1,2})(-|/| |,)*(\d{0,4}))',instr) mg = matches.groups() discdat=[x for x in mg if x][1:] if len(discdat) == 5: d,m,y= discdat[0],discdat[2],discdat[4] if len(d)>=3: temp = d d = m m = temp if len(y)==2: year = str(datetime.now().timetuple()[0]) if int(y) > int(year[2:]): y = '19'+y else: y = '20'+y else: d,m= discdat[0],discdat[1] if len(d)>=3: temp = d d = m m = temp y = '2020' res='{}{}{}'.format(y,mdic[m],d if len(d)==2 else "0"+d) return resdef norm_cndate(instr): try: matches=re.search('(\d{4}) *年 *(\d{1,2}) *月 *(\d{1,2}) *日',instr) y,m,d = matches.groups() m = m if len(m)==2 else '0'+m d = d if len(d)==2 else '0'+d except AttributeError: matches=re.search(' *(\d{1,2}) *月 *(\d{1,2}) *日',instr) m,d = matches.groups() y = '2020' m = m if len(m)==2 else '0'+m d = d if len(d)==2 else '0'+d return '{}{}{}'.format(y,m,d)def reconc(src,tgt,srcptn,tgtptn): slst=[] tlst=[] ptn1=re.compile(srcptn,flags=re.I|re.M) ptn2=re.compile(tgtptn,flags=re.I|re.M) smatch = len([m.group() for m in re.finditer(ptn1,src) if m]) tmatch = len([m.group() for m in re.finditer(ptn2,tgt) if m]) if smatch ==tmatch: for m in re.finditer(ptn1,src): dat=norm_endate(m.group()) slst.append(dat) for m in re.finditer(ptn2,tgt): dat=norm_cndate(m.group()) tlst.append(dat) slst.sort() tlst.sort() return slst==tlstdef chkxls(path): p1="((\d{1,2})(-|/| )*([A-Z]{1}[a-z]{2,9})(-|/| |,)*(\d{2,4}))|(([A-Z]{1}[a-z]{2,9})(-|/| )*(\d{1,2})(-|/| |,)*(\d{2,4}))" p2="(\d{4}) *年 *(\d{1,2}) *月 *(\d{1,2}) *日" wb = xlrd.open_workbook(path) ws = wb.sheet_by_name(wb.sheet_names()[0]) lrow,lcol = ws.nrows,ws.ncols for r in range(0,lrow): src = ws.cell_value(r,2) tgt = ws.cell_value(r,3) out = reconc(str(src),str(tgt),p1,p2) if out == False: print("Err row: %s"%(r+1))mp=input("Please enter directory to target Excel file:")for f in os.listdir(mp): if f.split('.')[-1].startswith('xls'): path = mp+'\\'+fchkxls(path)