|
@@ -1,3 +1,4 @@
|
|
|
+from typing import NoReturn
|
|
|
from selenium import webdriver
|
|
|
import time
|
|
|
import networkx as nx
|
|
@@ -16,13 +17,24 @@ from browser_common import JBrowser
|
|
|
import datetime
|
|
|
import dataset
|
|
|
|
|
|
-db = dataset.connect('mysql://choozmo:pAssw0rd@139.162.121.30:33306/hhh?charset=utf8mb4')
|
|
|
-table=db['monitor_kw_raw']
|
|
|
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
|
|
|
+
|
|
|
+#db = dataset.connect('mysql://choozmo:pAssw0rd@139.162.121.30:33306/hhh?charset=utf8mb4')
|
|
|
+#table=db['monitor_kw_raw']
|
|
|
|
|
|
##db = dataset.connect('sqlite:///c:/tmp/hhh.db')
|
|
|
#db = dataset.connect('sqlite:///c:/tmp/hhh.db')
|
|
|
|
|
|
+
|
|
|
+
|
|
|
+def find_master_by_designer(masters,designer):
|
|
|
+ for m in masters:
|
|
|
+ if m['designer']==designer:
|
|
|
+ return m
|
|
|
+
|
|
|
+
|
|
|
def get_designer_statistics(designer_list):
|
|
|
+ global db
|
|
|
jb=JBrowser()
|
|
|
jb.set_profile_path("Profile 7")
|
|
|
#q='萬寶隆空間設計團隊'
|
|
@@ -38,7 +50,14 @@ def get_designer_statistics(designer_list):
|
|
|
details=[]
|
|
|
masters=[]
|
|
|
|
|
|
- for q in qlist:
|
|
|
+
|
|
|
+ table=db['customer_list']
|
|
|
+ table_details=db['rank_details']
|
|
|
+ table_summary=db['rank_summary']
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for q in designer_list:
|
|
|
|
|
|
googleurl='https://www.google.com/search?q='+q
|
|
|
|
|
@@ -48,35 +67,85 @@ def get_designer_statistics(designer_list):
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
- elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
|
|
|
+# elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
|
|
|
+ elmts=driver.find_elements_by_xpath("//div[@class='TbwUpd NJjxre']/cite")
|
|
|
+# print(elmts)
|
|
|
+# time.sleep(9999)
|
|
|
+
|
|
|
+
|
|
|
idx=1
|
|
|
ranking=-1
|
|
|
+ searchhome=-1
|
|
|
+ hhh=-1
|
|
|
+ com100=-1
|
|
|
+ txt=None
|
|
|
+ href=None
|
|
|
for elmt in elmts:
|
|
|
- href=elmt.get_attribute('href')
|
|
|
- txt=elmt.text
|
|
|
- if len(txt)>10:
|
|
|
+
|
|
|
+
|
|
|
+# href=elmt.get_attribute('href')
|
|
|
+ elmt_titl3=elmt.find_element_by_xpath("../..//h3")
|
|
|
+ elmt_href=elmt.find_element_by_xpath("../..")
|
|
|
+
|
|
|
+ print(elmt_titl3.text)
|
|
|
+ print(elmt_href.text)
|
|
|
+ txt=elmt_titl3.text
|
|
|
+ href=elmt_href.text
|
|
|
+
|
|
|
+
|
|
|
+ print(idx)
|
|
|
+ print(len(elmts))
|
|
|
+ print(txt)
|
|
|
+ print(href)
|
|
|
+# time.sleep(9999)
|
|
|
+# if len(txt)>2:
|
|
|
+ if not ('google.com' in href):
|
|
|
+
|
|
|
+ if '100.com' in href:
|
|
|
+ com100=idx
|
|
|
+
|
|
|
+ if 'searchome' in href:
|
|
|
+ searchhome=idx
|
|
|
+
|
|
|
if 'hhh.com.tw' in href:
|
|
|
+ hhh=idx
|
|
|
ranking=idx
|
|
|
- # h_table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
|
|
|
- masters.append({'designer':q,'title':txt,'url':href,'ranking':idx})
|
|
|
+ print("updsert")
|
|
|
+ print({'name':q,'updated':datetime.datetime.now()})
|
|
|
+ table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
|
|
|
+ m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100}
|
|
|
+ masters.append(m)
|
|
|
+ table_summary.insert(m)
|
|
|
|
|
|
print(href)
|
|
|
print(txt)
|
|
|
# table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
|
|
|
- details.append({'designer':q,'title':txt,'url':href,'ranking':idx})
|
|
|
+ dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S")
|
|
|
+ d={'designer':q,'title':txt,'url':href,'ranking':idx}
|
|
|
+ details.append(d)
|
|
|
+ table_details.insert({'q':q,'dt':datetime.datetime.now(),'designer':d['designer'],'title':d['title'],'url':d['url'],'ranking':idx,'hhh':hhh,'searchome':searchhome,'c_100':com100})
|
|
|
|
|
|
idx+=1
|
|
|
|
|
|
+# time.sleep(9999)
|
|
|
+
|
|
|
+ if ranking==-1:
|
|
|
+ ranking=idx
|
|
|
+ print("updsert")
|
|
|
+ print({'name':q,'updated':datetime.datetime.now()})
|
|
|
+ table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
|
|
|
+ m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100}
|
|
|
+ masters.append(m)
|
|
|
+ table_summary.insert(m)
|
|
|
+
|
|
|
+ db.commit()
|
|
|
+
|
|
|
time.sleep(3)
|
|
|
|
|
|
# print(masters)
|
|
|
# print(details)
|
|
|
return {'masters':masters,'details':details}
|
|
|
|
|
|
-def find_master_by_designer(masters,designer):
|
|
|
- for m in masters:
|
|
|
- if m['designer']==designer:
|
|
|
- return m
|
|
|
|
|
|
# 寓子設計
|
|
|
#qlist=['元均制作']
|
|
@@ -100,26 +169,14 @@ def find_master_by_designer(masters,designer):
|
|
|
|
|
|
|
|
|
qlist=[]
|
|
|
-cursor=db.query('select name from monitor_list order by updated asc limit 20')
|
|
|
+cursor=db.query('select name from customer_list order by updated asc limit 20')
|
|
|
for c in cursor:
|
|
|
qlist.append(c['name'])
|
|
|
|
|
|
-results=get_designer_statistics(qlist)
|
|
|
-print(results)
|
|
|
-
|
|
|
-table=db['monitor_list']
|
|
|
-table_details=db['table_details']
|
|
|
-
|
|
|
-
|
|
|
-for q in qlist:
|
|
|
- df = pd.DataFrame(columns=('designer','title','url','ranking','幸福空間排名','更新時間'))
|
|
|
- r=find_master_by_designer(results['masters'],q)
|
|
|
- idx=0
|
|
|
- dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S")
|
|
|
- table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
|
|
|
- for d in results['details']:
|
|
|
- if d['designer']==q:
|
|
|
- df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr]
|
|
|
- idx+=1
|
|
|
- df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')
|
|
|
+#get_designer_statistics([qlist[0]])
|
|
|
+get_designer_statistics(qlist)
|
|
|
+# if d['designer']==q:
|
|
|
+# df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr]
|
|
|
+# idx+=1
|
|
|
+# df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')
|
|
|
|