因为工作原因经常需要输入身份证信息到excel
基于百度文字识别功能简单写了个身份证图片识别
有需要可以参考下
[Python] 纯文本查看 复制代码 from aip import AipOcr
import glob,re, time
import openpyxl
APPID=
APIKEY=
SECRETKET=
client=AipOcr(APPID,APIKEY,SECRETKEY)
def get_path(path):#获取图片路径
return glob.iglob(r".\%s\**\*[jpg,png]"%(path),recursiwe=True)
def open file(filePath) :#读取图片
with open(filePath,'rb') as fp:
return fp.read()
def get text (fileName] :#转换文本
image = open_file(fileName)
dic_result = client.basicGeneral(image)
#time.sleep(1)
res = dic_result['words_result’]
result=''
for m in res:
result=result+str (m['words’])
return result
parse_text (aaa):#解析文本
list1=[]
list2=[]
for i in aaa:
ls_dict={}
text=get_text(i)
if re.search('认民身份号码',text):
try:
xm=re.search(r"名(.*?)性别",text).group(1)
sf=re.search(r"号码([\d|X|x]*)",text).group(1)
ls_dict['xm']=xm
ls_dict['sf']=sf
list1.append(ls_dict)
except:
continue
else:
list2.append(text)
return list1
def write_excel(bbb):#写入excel
wb = openpyxl.Workbook()
sheet = wb.active
sheet['A1']='姓名'
sheet['B1']='身份证号码'
row=2
for i in bbb:
sheet.cell(row,1,i['xm'])
sheet.cell(row,2,i['sf'])
row += 1
print(f" {i['xm']}已录入…")
wb.save("./人员明细.xlsx")
def main():
path='aaa'
aaa=get_path(path)
aaa=list(aaa)
print('正在解析图片信息....')
print(f'大约需要{int(len(aaa)*2)}秒!'}
bbb=parse_text(aaa)
write_excel(bbb)
print('录入完毕!!!')
time.sleep(2)
if __name__=='__main__':
main()
|