|
@@ -449,11 +449,13 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
|
|
|
rep_ls = text_parser.replace_list(gts[i])
|
|
|
for reptxt in rep_ls:
|
|
|
gts[i] = gts[i].replace(reptxt,'')
|
|
|
+ print('GROUND TRUTH')
|
|
|
print(gts)
|
|
|
gens = []
|
|
|
for idx in range(int((len(gens_raw)+1)/4)):
|
|
|
gens.append(gens_raw[idx*4+2])
|
|
|
-
|
|
|
+ print('GENERATED')
|
|
|
+ print(gens)
|
|
|
combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])]
|
|
|
combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])]
|
|
|
alls = gts + combine2 + combine3
|
|
@@ -461,6 +463,7 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
|
|
|
adjusted = [None]*len(gens)
|
|
|
duplicated_list = []
|
|
|
for idx in range(len(gens)):
|
|
|
+ print('gen:',gens[idx])
|
|
|
match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
|
|
|
if match_text[0] in duplicated_list:
|
|
|
for mt in match_text:
|