gspace_fetch_ranks.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. from typing import NoReturn
  2. from selenium import webdriver
  3. import time
  4. import networkx as nx
  5. import dataset
  6. import pickle
  7. import codecs
  8. from selenium.webdriver.common import keys
  9. from selenium.webdriver.common.keys import Keys
  10. import sys
  11. import os
  12. import time
  13. import re
  14. import pandas as pd
  15. import df2sheet
  16. from browser_common import JBrowser
  17. import datetime
  18. import dataset
  19. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  20. #db = dataset.connect('mysql://choozmo:pAssw0rd@139.162.121.30:33306/hhh?charset=utf8mb4')
  21. #table=db['monitor_kw_raw']
  22. ##db = dataset.connect('sqlite:///c:/tmp/hhh.db')
  23. #db = dataset.connect('sqlite:///c:/tmp/hhh.db')
  24. def find_master_by_designer(masters,designer):
  25. for m in masters:
  26. if m['designer']==designer:
  27. return m
  28. def get_designer_statistics(designer_list):
  29. global db
  30. jb=JBrowser()
  31. jb.set_profile_path("Profile 7")
  32. #q='萬寶隆空間設計團隊'
  33. #q='含仰空間設計'
  34. #q='承炫裝修有限公司'
  35. #q='寓子設計'
  36. #q='水水設計'
  37. #q='廣延空間設計'
  38. #q='里摩室內裝修設計'
  39. # qlist=['三宅一秀','元均制作','采品室內設計','比沙列室內','澤序空間設計','禾築國際設計','恆岳空間設計','優尼客空間設計','摩登雅舍','亞維空間設計','采舍空間設計','雅典設計','IS國際設計','上築空間設計','京璽國際','禾亞國際室內','我思空間設計','雲方室內設計','允庭室內裝修','富億空間設計','格綸設計','浩室設計','豐聚室內設計','唐林建築室內','沛沛國際室內設計','']
  40. #q=qlist[0]
  41. details=[]
  42. masters=[]
  43. table=db['customer_list']
  44. table_details=db['rank_details']
  45. table_summary=db['rank_summary']
  46. for tup in designer_list:
  47. q=tup[0]
  48. vip=tup[1]
  49. googleurl='https://www.google.com/search?q='+q
  50. jb.get(googleurl)
  51. driver=jb.get_driver()
  52. time.sleep(3)
  53. # elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  54. elmts=driver.find_elements_by_xpath("//div[@class='TbwUpd NJjxre']/cite")
  55. # print(elmts)
  56. # time.sleep(9999)
  57. idx=1
  58. ranking=-1
  59. searchhome=-1
  60. hhh=-1
  61. com100=-1
  62. txt=None
  63. href=None
  64. for elmt in elmts:
  65. # href=elmt.get_attribute('href')
  66. elmt_titl3=elmt.find_element_by_xpath("../..//h3")
  67. elmt_href=elmt.find_element_by_xpath("../..")
  68. print(elmt_titl3.text)
  69. print(elmt_href.text)
  70. txt=elmt_titl3.text
  71. href=elmt_href.text
  72. print(idx)
  73. print(len(elmts))
  74. print(txt)
  75. print(href)
  76. # time.sleep(9999)
  77. # if len(txt)>2:
  78. if not ('google.com' in href):
  79. if '100.com' in href:
  80. com100=idx
  81. if 'searchome' in href:
  82. searchhome=idx
  83. if 'hhh.com.tw' in href:
  84. hhh=idx
  85. ranking=idx
  86. print("updsert")
  87. print({'name':q,'updated':datetime.datetime.now()})
  88. table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
  89. m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100,'vip':vip}
  90. masters.append(m)
  91. table_summary.insert(m)
  92. print(href)
  93. print(txt)
  94. # table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
  95. dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S")
  96. d={'designer':q,'title':txt,'url':href,'ranking':idx}
  97. details.append(d)
  98. table_details.insert({'q':q,'dt':datetime.datetime.now(),'designer':d['designer'],'title':d['title'],'url':d['url'],'ranking':idx,'hhh':hhh,'searchome':searchhome,'c_100':com100})
  99. idx+=1
  100. # time.sleep(9999)
  101. if ranking==-1:
  102. ranking=idx
  103. print("updsert")
  104. print({'name':q,'updated':datetime.datetime.now()})
  105. table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
  106. m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100}
  107. masters.append(m)
  108. table_summary.insert(m)
  109. db.commit()
  110. time.sleep(3)
  111. # print(masters)
  112. # print(details)
  113. return {'masters':masters,'details':details}
  114. # 寓子設計
  115. #qlist=['元均制作']
  116. #qlist=['三宅一秀']
  117. #qlist=['采品室內設計']
  118. #qlist=['寓子設計']
  119. #qlist=['綵韻室內設計','春雨時尚空間','阿曼空間設計','雅典設計','境庭國際設計']
  120. #qlist=['豐聚室內裝修','張馨室內設計','尚藝室內裝修','富億空間設計','比沙列室內裝修']
  121. #qlist=['森境王俊宏設計','格綸設計','齊舍設計','采舍空間設計','大琚空間設計']
  122. #qlist=['將作空間','昱承室內裝修','YHS DESIGN','德本迪室內設計','東風室內設計']
  123. #qlist=['陶璽空間設計','惹雅國際設計','浩室設計','藝谷空間設計','IS國際設計']
  124. #qlist=['摩登雅舍室內','星葉室內裝修','浩室設計','演拓空間','千綵胤空間']
  125. #qlist=['京璽國際','元典設計','朱英凱室內設計','亞維空間設計','馥築時尚設計']
  126. #qlist=['文儀室內裝修','寓子設計','恆岳空間設計','卓林室內設計','歐德傢俱']
  127. #qlist=['大久空間設計','成綺空間設計','知域設計','尚展空間設計','演繹動線空間']
  128. #qlist=['苡希創意設計','玖柞設計','維耕設計','昱森室內設計','上築空間設計']
  129. #qlist=['HATCH合砌設計','至文室內裝修','上陽設計','禾禾設計','聯寬室內裝修']
  130. #設計
  131. #qlist=['三宅一秀','萬寶隆空間設計','含仰空間設計','元均制作','承炫裝修']
  132. qlist=[]
  133. cursor=db.query('select name,vip from customer_list order by updated asc limit 170')
  134. #cursor=db.query('select name,vip from customer_list where vip = 0 order by updated asc limit 20')
  135. #cursor=db.query('select name,vip from customer_list where vip = 1 order by updated asc limit 20')
  136. for c in cursor:
  137. qlist.append((c['name'],c['vip']))
  138. #get_designer_statistics([qlist[0]])
  139. get_designer_statistics(qlist)
  140. # if d['designer']==q:
  141. # df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr]
  142. # idx+=1
  143. # df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')