remote_gspace_fetch_ranks.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. from typing import NoReturn
  2. from selenium import webdriver
  3. import time
  4. import networkx as nx
  5. import dataset
  6. import pickle
  7. import codecs
  8. from selenium.webdriver.common import keys
  9. from selenium.webdriver.common.keys import Keys
  10. from selenium import webdriver
  11. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  12. import time
  13. import os
  14. import urllib.parse
  15. from selenium.webdriver.support.ui import WebDriverWait
  16. from selenium.webdriver.common.by import By
  17. from selenium.webdriver.support import expected_conditions as EC
  18. import sys
  19. import os
  20. import time
  21. import re
  22. import pandas as pd
  23. import df2sheet
  24. from browser_common import JBrowser
  25. import datetime
  26. import dataset
  27. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  28. def find_master_by_designer(masters,designer):
  29. for m in masters:
  30. if m['designer']==designer:
  31. return m
  32. def get_designer_statistics(designer_list):
  33. global db
  34. details=[]
  35. masters=[]
  36. table=db['customer_list']
  37. table_details=db['rank_details']
  38. table_summary=db['rank_summary']
  39. driver = webdriver.Remote(
  40. # command_executor='http://dev2.choozmo.com:34444/wd/hub',
  41. command_executor='http://127.0.0.1:4444/wd/hub',
  42. #command_executor='http://192.53.174.202:4444/wd/hub',
  43. #command_executor='http://dev2.choozmo.com:14444/wd/hub',
  44. desired_capabilities=DesiredCapabilities.CHROME)
  45. driver.set_window_size(1400,1000)
  46. for tup in designer_list:
  47. q=tup[0]
  48. vip=tup[1]
  49. googleurl='https://www.google.com/search?q='+q
  50. driver.get(googleurl)
  51. time.sleep(3)
  52. # elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  53. elmts=driver.find_elements_by_xpath("//div[@class='TbwUpd NJjxre']/cite")
  54. # print(elmts)
  55. # time.sleep(9999)
  56. idx=1
  57. ranking=-1
  58. searchhome=-1
  59. hhh=-1
  60. com100=-1
  61. txt=None
  62. href=None
  63. for elmt in elmts:
  64. # href=elmt.get_attribute('href')
  65. elmt_titl3=elmt.find_element_by_xpath("../..//h3")
  66. elmt_href=elmt.find_element_by_xpath("../..")
  67. print(elmt_titl3.text)
  68. print(elmt_href.text)
  69. txt=elmt_titl3.text
  70. href=elmt_href.text
  71. print(idx)
  72. print(len(elmts))
  73. print(txt)
  74. print(href)
  75. # time.sleep(9999)
  76. # if len(txt)>2:
  77. if not ('google.com' in href):
  78. if '100.com' in href:
  79. com100=idx
  80. if 'searchome' in href:
  81. searchhome=idx
  82. if 'hhh.com.tw' in href:
  83. hhh=idx
  84. ranking=idx
  85. print("updsert")
  86. print({'name':q,'updated':datetime.datetime.now()})
  87. table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
  88. m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100,'vip':vip}
  89. masters.append(m)
  90. table_summary.insert(m)
  91. print(href)
  92. print(txt)
  93. # table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
  94. dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S")
  95. d={'designer':q,'title':txt,'url':href,'ranking':idx}
  96. details.append(d)
  97. table_details.insert({'q':q,'dt':datetime.datetime.now(),'designer':d['designer'],'title':d['title'],'url':d['url'],'ranking':idx,'hhh':hhh,'searchome':searchhome,'c_100':com100})
  98. idx+=1
  99. # time.sleep(9999)
  100. if ranking==-1:
  101. ranking=idx
  102. print("updsert")
  103. print({'name':q,'updated':datetime.datetime.now()})
  104. table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
  105. m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100}
  106. masters.append(m)
  107. table_summary.insert(m)
  108. db.commit()
  109. time.sleep(3)
  110. # print(masters)
  111. # print(details)
  112. return {'masters':masters,'details':details}
  113. # 寓子設計
  114. #qlist=['元均制作']
  115. #qlist=['三宅一秀']
  116. #qlist=['采品室內設計']
  117. #qlist=['寓子設計']
  118. #qlist=['綵韻室內設計','春雨時尚空間','阿曼空間設計','雅典設計','境庭國際設計']
  119. #qlist=['豐聚室內裝修','張馨室內設計','尚藝室內裝修','富億空間設計','比沙列室內裝修']
  120. #qlist=['森境王俊宏設計','格綸設計','齊舍設計','采舍空間設計','大琚空間設計']
  121. #qlist=['將作空間','昱承室內裝修','YHS DESIGN','德本迪室內設計','東風室內設計']
  122. #qlist=['陶璽空間設計','惹雅國際設計','浩室設計','藝谷空間設計','IS國際設計']
  123. #qlist=['摩登雅舍室內','星葉室內裝修','浩室設計','演拓空間','千綵胤空間']
  124. #qlist=['京璽國際','元典設計','朱英凱室內設計','亞維空間設計','馥築時尚設計']
  125. #qlist=['文儀室內裝修','寓子設計','恆岳空間設計','卓林室內設計','歐德傢俱']
  126. #qlist=['大久空間設計','成綺空間設計','知域設計','尚展空間設計','演繹動線空間']
  127. #qlist=['苡希創意設計','玖柞設計','維耕設計','昱森室內設計','上築空間設計']
  128. #qlist=['HATCH合砌設計','至文室內裝修','上陽設計','禾禾設計','聯寬室內裝修']
  129. #設計
  130. #qlist=['三宅一秀','萬寶隆空間設計','含仰空間設計','元均制作','承炫裝修']
  131. qlist=[]
  132. cursor=db.query('select name,vip from customer_list order by updated asc limit 50')
  133. #cursor=db.query('select name,vip from customer_list where vip = 0 order by updated asc limit 20')
  134. #cursor=db.query('select name,vip from customer_list where vip = 1 order by updated asc limit 20')
  135. for c in cursor:
  136. qlist.append((c['name'],c['vip']))
  137. #get_designer_statistics([qlist[0]])
  138. get_designer_statistics(qlist)
  139. # if d['designer']==q:
  140. # df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr]
  141. # idx+=1
  142. # df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')