|
@@ -463,7 +463,6 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
|
|
adjusted = [None]*len(gens)
|
|
adjusted = [None]*len(gens)
|
|
duplicated_list = []
|
|
duplicated_list = []
|
|
for idx in range(len(gens)):
|
|
for idx in range(len(gens)):
|
|
- print('gen:',gens[idx])
|
|
|
|
match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
|
|
match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
|
|
if match_text[0] in duplicated_list:
|
|
if match_text[0] in duplicated_list:
|
|
for mt in match_text:
|
|
for mt in match_text:
|
|
@@ -473,10 +472,16 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
|
|
else:
|
|
else:
|
|
adjusted[idx] = match_text[0]
|
|
adjusted[idx] = match_text[0]
|
|
duplicated_list.append(match_text[0])
|
|
duplicated_list.append(match_text[0])
|
|
|
|
+ print('ADJUESTED')
|
|
|
|
+ print(adjusted)
|
|
combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
|
|
combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
|
|
combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
|
|
combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
|
|
alls_tag = gts_in + combine2_tag + combine3_tag
|
|
alls_tag = gts_in + combine2_tag + combine3_tag
|
|
|
|
+ print('ALLS_TAG')
|
|
|
|
+ print(alls_tag)
|
|
for idx in range(len(adjusted)):
|
|
for idx in range(len(adjusted)):
|
|
|
|
+ print('adj:',adjusted[idx])
|
|
|
|
+
|
|
match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
|
|
match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
|
|
adjusted[idx] = match_text[0]
|
|
adjusted[idx] = match_text[0]
|
|
return adjusted
|
|
return adjusted
|