### What problem does this PR solve? Always open text file for write with UTF-8. Close #932 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.14.1
| def rewrite_json_file(filepath, json_data): | def rewrite_json_file(filepath, json_data): | ||||
| with open(filepath, "w") as f: | |||||
| with open(filepath, "w", encoding='utf-8') as f: | |||||
| json.dump(json_data, f, indent=4, separators=(",", ": ")) | json.dump(json_data, f, indent=4, separators=(",", ": ")) | ||||
| f.close() | f.close() | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import os, json,re,copy | |||||
| import os | |||||
| import json | |||||
| import re | |||||
| import copy | |||||
| import pandas as pd | import pandas as pd | ||||
| current_file_path = os.path.dirname(os.path.abspath(__file__)) | current_file_path = os.path.dirname(os.path.abspath(__file__)) | ||||
| TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("") | TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("") | ||||
| def loadRank(fnm): | def loadRank(fnm): | ||||
| global TBL | global TBL | ||||
| TBL["rank"] = 1000000 | TBL["rank"] = 1000000 | ||||
| with open(fnm, "r",encoding='UTF-8') as f: | |||||
| with open(fnm, "r", encoding='utf-8') as f: | |||||
| while True: | while True: | ||||
| l = f.readline() | l = f.readline() | ||||
| if not l:break | if not l:break | ||||
| nm,rk = l[0].strip(),int(l[1]) | nm,rk = l[0].strip(),int(l[1]) | ||||
| #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>" | #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>" | ||||
| TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk | TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk | ||||
| except Exception as e: | |||||
| except Exception: | |||||
| pass | pass | ||||
| "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]] | "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]] | ||||
| img = draw_box(images[i], bxs, ["ocr"], 1.) | img = draw_box(images[i], bxs, ["ocr"], 1.) | ||||
| img.save(outputs[i], quality=95) | img.save(outputs[i], quality=95) | ||||
| with open(outputs[i] + ".txt", "w+") as f: | |||||
| with open(outputs[i] + ".txt", "w+", encoding='utf-8') as f: | |||||
| f.write("\n".join([o["text"] for o in bxs])) | f.write("\n".join([o["text"] for o in bxs])) | ||||
| if args.mode.lower() == "tsr": | if args.mode.lower() == "tsr": | ||||
| #lyt = [t for t in lyt if t["type"] == "table column"] | #lyt = [t for t in lyt if t["type"] == "table column"] | ||||
| html = get_table_html(images[i], lyt, ocr) | html = get_table_html(images[i], lyt, ocr) | ||||
| with open(outputs[i] + ".html", "w+") as f: | |||||
| with open(outputs[i] + ".html", "w+", encoding='utf-8') as f: | |||||
| f.write(html) | f.write(html) | ||||
| lyt = [{ | lyt = [{ | ||||
| "type": t["label"], | "type": t["label"], |
| scores = sorted(scores, key=lambda kk: kk[1]) | scores = sorted(scores, key=lambda kk: kk[1]) | ||||
| for score in scores[:10]: | for score in scores[:10]: | ||||
| f.write('- text: ' + str(texts[score[0]]) + '\t qrel: ' + str(score[1]) + '\n') | f.write('- text: ' + str(texts[score[0]]) + '\t qrel: ' + str(score[1]) + '\n') | ||||
| json.dump(qrels, open(os.path.join(file_path, dataset + '.qrels.json'), "w+"), indent=2) | |||||
| json.dump(run, open(os.path.join(file_path, dataset + '.run.json'), "w+"), indent=2) | |||||
| json.dump(qrels, open(os.path.join(file_path, dataset + '.qrels.json'), "w+", encoding='utf-8'), indent=2) | |||||
| json.dump(run, open(os.path.join(file_path, dataset + '.run.json'), "w+", encoding='utf-8'), indent=2) | |||||
| print(os.path.join(file_path, dataset + '_result.md'), 'Saved!') | print(os.path.join(file_path, dataset + '_result.md'), 'Saved!') | ||||
| def __call__(self, dataset, file_path, miracl_corpus=''): | def __call__(self, dataset, file_path, miracl_corpus=''): |