12345678910111213141516171819202122232425 |
- import re
- from itertools import groupby
- from operator import itemgetter
- ipath= "中文中文在這Windows on ARM不好用Eng at last"
- re.findall(r'[\u4e00-\u9fff]+', ipath)
- zh_idx = []
- eng_idx= []
- for i in range(len(ipath)):
- if ipath[i] > u'\u4e00' and ipath[i] < u'\u9fff':
- zh_idx.append(i)
- else:
- eng_idx.append(i)
- for k, g in groupby(enumerate(eng_idx), lambda ix : ix[0] - ix[1]):
- eng_range = list(map(itemgetter(1), g))
- ipath2 = ipath[0 : eng_range[0] : ] + ipath[eng_range[-1]+1 : :]
- print(eng_range)
- def split_sentence():
-
|