import re from itertools import groupby from operator import itemgetter ipath= "中文中文在這Windows on ARM不好用Eng at last" re.findall(r'[\u4e00-\u9fff]+', ipath) zh_idx = [] eng_idx= [] for i in range(len(ipath)): if ipath[i] > u'\u4e00' and ipath[i] < u'\u9fff': zh_idx.append(i) else: eng_idx.append(i) for k, g in groupby(enumerate(eng_idx), lambda ix : ix[0] - ix[1]): eng_range = list(map(itemgetter(1), g)) ipath2 = ipath[0 : eng_range[0] : ] + ipath[eng_range[-1]+1 : :] print(eng_range) def split_sentence():