| 
					
				 | 
			
			
				@@ -5,14 +5,15 @@ import html2text 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import jieba 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 jieba.load_userdict("c:/tmp/userdict.txt") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-stopwords=', 的/-。*.|)(][_!、」「::jpgmenu有了也gif%stylespnghttpsimagesicogovRSSscript' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+stopwords=', 的/-。*.|)(][_!、」「::|)』『(xmlimgursvgbase64jpgmenuMenu有了也gif%stylespnghttpsimagesicogovRSSscript' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 db = dataset.connect('sqlite:///c:/tmp/jieba.db') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 db.query('delete from tmp') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 #db.query('drop table tmp') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-urls=['https://www.nightnight.tw/%E5%BA%8A%E5%A2%8A%E6%8E%A8%E8%96%A6/'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#urls=['https://www.dcard.tw/f/house/p/232318765?cid=BBDFB720-BAE4-406E-8449-D2F12EA11241'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+urls=['https://www.gold-kirin.com.tw/about'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 #db = dataset.connect('sqlite:///:memory:') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 table=db['tmp'] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -36,7 +37,7 @@ for url in urls: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             table.insert({'word':word}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-cursor=db.query('select word,count(word) as cnt from tmp group by word having count(word) >2 order by count(word) desc') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+cursor=db.query('select word,count(word) as cnt from tmp group by word having ( count(word) >1 or length(word)>2) order by count(word) desc') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 for c in cursor: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print(c['word']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print(c['cnt']) 
			 |