RoadToPPDE
/
OCRandQPTandIS

 
			
							from paddleocr import PaddleOCR
import os
import csv
import time

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


# dirpath 为存图片的文件夹的路径，如这里的test,这里是为了取图片的路径
def manyImages(dirpath):
    dirpath = dirpath
    all_list = []
    all_list_path = []
    i = 0  # 标记总文件数量
    for root, dirs, files in os.walk(dirpath):  # 分别代表根目录、文件夹、文件
        for file in files:
            i = i + 1
            imgpath = os.path.join(root, file)
            all_list.append(imgpath + "\n")
            all_list_path.append(imgpath)

    allstr = ''.join(all_list)
    f = open('all_list.txt', 'w', encoding='utf-8-sig')
    f.write(allstr)
    return all_list_path, i


def ocr(dirpath, savepath):
    all_img, length = manyImages(dirpath)
    print("all_img:", all_img)
    print("一共需要处理%d张图片" % length)
    ocr = PaddleOCR(use_angle_cls=True, lang="ch")
    print("开始识别并写入csv文件：")

    # 获取当前时间戳
    time_stamp = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time()))

    # 创建文件夹如果不存在的话
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    final_csv_path = savepath + "result_" + time_stamp + ".csv"

    with open(final_csv_path, 'w', encoding='utf-8-sig') as f:
        row = ['姓名', '扫码地点', '进入登记时间', '最新上报时间', '结果', '码状态', '疫苗接种信息']
        write = csv.writer(f)
        write.writerow(row)
        f.close()
    for img in all_img:
        results = ocr.ocr(img, cls=True)

        with open(final_csv_path, 'a', encoding='UTF-8', newline='') as f:
            write = csv.writer(f)
            r = ['未知', '未知', '未知', '未知', '未知', '未知', '未知']
            for i in range(len(results)):
                data = results[i]
                if "*" in data[1][0]:
                    if data[1][1] > 0.8:
                        r[0] = data[1][0]
                        r[1] = results[i+1][1][0]
                    else:
                        r[0] = "未知"
                if "登记时间" in data[1][0]:
                    if data[1][1] > 0.8:
                        r[2] = data[1][0].split("：")[1].split("-")[0] + "-" + \
                               data[1][0].split("：")[1].split("-")[1] + "-" + \
                               data[1][0].split("：")[1].split("-")[2][:2] + " " + \
                               data[1][0].split("：")[1].split("-")[2][2:]
                    else:
                        r[2] = "未知"
                if "上报时间" in data[1][0]:
                    if data[1][1] > 0.8:
                        r[3] = data[1][0].split("：")[1]
                    else:
                        r[3] = "未知"
                if "阴性" in data[1][0] or "阳性" in data[1][0]:
                    if data[1][1] > 0.8:
                        r[4] = data[1][0][-2:]
                    else:
                        r[4] = "未知"
                if "码" in data[1][0]:
                    if data[1][1] > 0.8:
                        r[5] = data[1][0]
                    else:
                        r[5] = "未知"
                if "完成" in data[1][0]:
                    if data[1][1] > 0.8:
                        r[6] = data[1][0].split("您")[1]
                    else:
                        r[6] = "未知"

            write.writerow(r)
            f.close()
    print("写入csv文件完成")


if __name__ == '__main__':
    ocr('images', 'csv/')