import re,osimport win32com.clientfrom datetime import datetimefrom openpyxl import load_workbookfrom openpyxl.styles import PatternFillmdic={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12","January":"01","February":"02","March":"03","April":"04","May":"05","June":"06","July":"07","August":"08","September":"09","October":"10","November":"11","December":"12"}def norm_endate(instr): year = str(datetime.now().timetuple()[0]) matches=re.search('(\d{1,2})(-|/| )*(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| |, ?)*(\d{4}|\d{2})',instr,re.I) if matches: mg = matches.groups() d,m,y = mg[0],mg[2],mg[4] d = d if len(d)==2 else '0'+d m = m[0].upper()+m[1:].lower() m = mdic[m] if len(y)==2: y = '19'+y if int(y) > int(year[2:]) else '20'+y return '{}{}{}'.format(y,m,d) matches=re.search('(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| )*(\d{1,2})(-|/|, ?)(\d{4}|\d{2})',instr,re.I) if matches: mg = matches.groups() m,d,y = mg[0],mg[2],mg[4] d = d if len(d)==2 else '0'+d m = m[0].upper()+m[1:].lower() m = mdic[m] if len(y)==2: y = '19'+y if int(y) > int(year[2:]) else '20'+y return '{}{}{}'.format(y,m,d) matches=re.search('(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| )*(\d{4})',instr,re.I) if matches: mg = matches.groups() m,y = mg[0],mg[2] m = m[0].upper()+m[1:].lower() m = mdic[m] return '{}{}'.format(y,m) matches=re.search('(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| )*(\d{1,2})',instr,re.I) if matches: mg = matches.groups() m,d = mg[0],mg[2] d = d if len(d)==2 else '0'+d m = m[0].upper()+m[1:].lower() m = mdic[m] return '{}{}'.format(m,d) matches=re.search('(\d{1,2})(-|/| )(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}',instr,re.I) if matches: mg = matches.groups() d,m = mg[0],mg[2] d = d if len(d)==2 else '0'+d m = m[0].upper()+m[1:].lower() m = mdic[m] return '{}{}'.format(m,d) matches=re.search('\d+',instr) if matches: return matches.group()def norm_cndate(instr): matches=re.search('(\d{4}) *年 *(\d{1,2}) *月 *(\d{1,2}) *日',instr) if matches: y,m,d = matches.groups() m = m if len(m)==2 else '0'+m d = d if len(d)==2 else '0'+d return '{}{}{}'.format(y,m,d) matches=re.search('(\d{4}) *年 *(\d{1,2}) *月',instr) if matches: y,m = matches.groups() m = m if len(m)==2 else '0'+m return '{}{}'.format(y,m) matches=re.search('(\d{1,2}) *月 *(\d{1,2}) *日',instr) if matches: m,d = matches.groups() m = m if len(m)==2 else '0'+m d = d if len(d)==2 else '0'+d return '{}{}'.format(m,d) matches=re.search('\d+',instr) if matches: return matches.group()def reconc(src,tgt,srcptn,tgtptn): slst=[] tlst=[] ptn1=re.compile(srcptn,re.I) ptn2=re.compile(tgtptn) for m in re.finditer(ptn1,src): dat=norm_endate(m.group()) slst.append(dat) for m in re.finditer(ptn2,tgt): dat=norm_cndate(m.group()) tlst.append(dat) slst.sort() tlst.sort() return slst==tlstdef xls2xlsx(path): excel = win32com.client.gencache.EnsureDispatch('Excel.Application') excel.DisplayAlerts = False wbxls = excel.Workbooks.Open(path) outpath = path+'x' wbxls.SaveAs(outpath,51) wbxls.Close() excel.DisplayAlerts = True excel.Application.Quit() return outpathdef date2txt(src): src='{}/{}/{}'.format(src.year,src.month,src.day) return datetime.strptime(src, "%Y/%m/%d").strftime("%Y%m%d")def color(wks,cell,clrindex): pattern = PatternFill(start_color=clrindex, end_color=clrindex, fill_type='solid') wks[cell].fill = patterndef chkxls(path): if path.split('.')[-1]=='xls': path=xls2xlsx(path) p1x=['(\d{1,2})(-|/| )*(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| |, ?)*(\d{4}|\d{2})', '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| )*(\d{1,2})(-|/|, ?)(\d{4}|\d{2})', '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| )*(\d{4})', '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}(-|/| )*(\d{1,2})', '(\d{1,2})(-|/| )(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}', '\d+' ] p1='|'.join(['({})'.format(p) for p in p1x]) p2x=['(\d{4}) *年 *(\d{1,2}) *月 *(\d{1,2}) *日', '(\d{4}) *年 *(\d{1,2}) *月', '(\d{1,2}) *月 *(\d{1,2}) *日', '\d+' ] p2='|'.join(['({})'.format(p) for p in p2x]) wb = load_workbook(path) ws = wb.worksheets[0] lrow,lcol = ws.max_row,ws.max_column for r in range(1,lrow+1): src = ws.cell(r,3).value tgt = ws.cell(r,4).value if isinstance(src,datetime): src=date2txt(src) if isinstance(tgt,datetime): tgt=date2txt(tgt) out = reconc(str(src),str(tgt),p1,p2) if out == False: print("Err row: %s"%(r)) color(ws,'C'+str(r),'FFFFC000') color(ws,'D'+str(r),'FFFFC000') wb.save(path) wb.closecurdir=os.path.dirname(os.path.realpath(__file__))for f in os.listdir(curdir): if f.split('.')[-1] == 'xls': path = curdir+'\\'+f chkxls(path)