123456789101112131415161718192021222324252627282930 |
- import re
- s = u"三宅一秀空間創藝|天使總監郁琇琇 室內設計師,擅長鄉村風、古典風、美式風、奢華風、混搭風以及北歐風,多年被評價為幸福空間觀眾最愛室內設計師之列。屢獲亞洲、韓國、英國、義大利等多國設計大獎。"
- def trim_punctuation(s):
- pat_block = u'[^\u4e00-\u9fff0-9a-zA-Z]+';
- pattern = u'([0-9]+{0}[0-9]+)|{0}'.format(pat_block)
- res = re.sub(pattern, lambda x: x.group(1) if x.group(1) else u"" ,s)
- return res
- def splitter(s):
- for sent in re.findall(u'[^!?,。\.\!\?]+[!?。\.\!\?]?', s, flags=re.U):
- yield sent
- def split_by_pun(s):
- res = list(splitter(s))
- return res
- maxLen = 10
- s_list = split_by_pun(s)
- for sen in s_list:
- print(sen)
- print('-------------------------------')
- trim_list = [None]*len(s_list)
- for idx in range(len(s_list)):
- trim_list[idx] = trim_punctuation(s_list[idx])
- for sen in trim_list:
- if len(sen) >10:
- sen+='*'
- print(sen)
|