Xbench: makes QA better and faster

2021-02-20 我的時光穿梭機
# Recheck false postive nummeric errors (mostly dates) in the QA report from xbench.# Convert the following forms of date into standard form 'yyyymmdd' using norm_endate and norm_endate functions# dateformats ='''# 02 Aug, 2020# 02 Aug, 20# 02 Aug# 02/Dec/2020# 02/Aug/02# 02/Aug# 02-January-2020# 02-Aug-20# 02-Aug# Aug 2, 2020# Aug 12, 20# Aug 12# Aug/22/2020# Aug/02/82# Aug/02# January-02-2020# January-02-20# January-02# Jan-02-2020# '''import xlrd,re,osfrom datetime import datetimemdic={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12","January":"01","February":"02","March":"03","April":"04","May":"05","June":"06","July":"07","August":"08","September":"09","October":"10","November":"11","December":"12"}def norm_endate(instr):    matches=re.search('((\d{1,2})(-|/| )*([A-Z]{1}[a-z]{2,9})(-|/| |,)*(\d{0,4}))|(([A-Z]{1}[a-z]{2,9})(-|/| )*(\d{1,2})(-|/| |,)*(\d{0,4}))',instr)    mg = matches.groups()    discdat=[x for x in mg if x][1:]    if len(discdat) == 5:        d,m,y= discdat[0],discdat[2],discdat[4]        if len(d)>=3:            temp = d            d = m            m = temp        if len(y)==2:            year = str(datetime.now().timetuple()[0])            if int(y) > int(year[2:]):                y = '19'+y            else:                y = '20'+y    else:        d,m= discdat[0],discdat[1]        if len(d)>=3:            temp = d            d = m            m = temp        y = '2020'    res='{}{}{}'.format(y,mdic[m],d if len(d)==2 else "0"+d)    return resdef norm_cndate(instr):    try:        matches=re.search('(\d{4}) *年 *(\d{1,2}) *月 *(\d{1,2}) *日',instr)        y,m,d = matches.groups()        m = m if len(m)==2 else '0'+m        d = d if len(d)==2 else '0'+d    except AttributeError:        matches=re.search(' *(\d{1,2}) *月 *(\d{1,2}) *日',instr)        m,d = matches.groups()        y = '2020'        m = m if len(m)==2 else '0'+m        d = d if len(d)==2 else '0'+d    return '{}{}{}'.format(y,m,d)def reconc(src,tgt,srcptn,tgtptn):    slst=[]    tlst=[]    ptn1=re.compile(srcptn,flags=re.I|re.M)    ptn2=re.compile(tgtptn,flags=re.I|re.M)    smatch = len([m.group() for m in re.finditer(ptn1,src) if m])    tmatch = len([m.group() for m in re.finditer(ptn2,tgt) if m])    if smatch ==tmatch:        for m in re.finditer(ptn1,src):            dat=norm_endate(m.group())            slst.append(dat)        for m in re.finditer(ptn2,tgt):            dat=norm_cndate(m.group())            tlst.append(dat)    slst.sort()    tlst.sort()    return slst==tlstdef chkxls(path):    p1="((\d{1,2})(-|/| )*([A-Z]{1}[a-z]{2,9})(-|/| |,)*(\d{2,4}))|(([A-Z]{1}[a-z]{2,9})(-|/| )*(\d{1,2})(-|/| |,)*(\d{2,4}))"    p2="(\d{4}) *年 *(\d{1,2}) *月 *(\d{1,2}) *日"    wb = xlrd.open_workbook(path)    ws = wb.sheet_by_name(wb.sheet_names()[0])    lrow,lcol = ws.nrows,ws.ncols    for r in range(0,lrow):        src = ws.cell_value(r,2)        tgt = ws.cell_value(r,3)        out = reconc(str(src),str(tgt),p1,p2)        if out == False:            print("Err row: %s"%(r+1))mp=input("Please enter directory to target Excel file:")for f in os.listdir(mp):    if f.split('.')[-1].startswith('xls'):        path = mp+'\\'+fchkxls(path)

相關焦點