from typing import NoReturn from selenium import webdriver import time import networkx as nx import dataset import pickle import codecs from selenium.webdriver.common import keys from selenium.webdriver.common.keys import Keys from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import sys import os import time import re import pandas as pd import df2sheet from browser_common import JBrowser import datetime import dataset db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4') def find_master_by_designer(masters,designer): for m in masters: if m['designer']==designer: return m def get_designer_statistics(designer_list): global db details=[] masters=[] table=db['customer_list'] table_details=db['rank_details'] table_summary=db['rank_summary'] driver = webdriver.Remote( # command_executor='http://dev2.choozmo.com:34444/wd/hub', command_executor='http://127.0.0.1:4444/wd/hub', #command_executor='http://192.53.174.202:4444/wd/hub', #command_executor='http://dev2.choozmo.com:14444/wd/hub', desired_capabilities=DesiredCapabilities.CHROME) driver.set_window_size(1400,1000) for tup in designer_list: q=tup[0] vip=tup[1] googleurl='https://www.google.com/search?q='+q driver.get(googleurl) time.sleep(3) # elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a") elmts=driver.find_elements_by_xpath("//div[@class='TbwUpd NJjxre']/cite") # print(elmts) # time.sleep(9999) idx=1 ranking=-1 searchhome=-1 hhh=-1 com100=-1 txt=None href=None for elmt in elmts: # href=elmt.get_attribute('href') elmt_titl3=elmt.find_element_by_xpath("../..//h3") elmt_href=elmt.find_element_by_xpath("../..") print(elmt_titl3.text) print(elmt_href.text) txt=elmt_titl3.text href=elmt_href.text print(idx) print(len(elmts)) print(txt) print(href) # time.sleep(9999) # if len(txt)>2: if not ('google.com' in href): if '100.com' in href: com100=idx if 'searchome' in href: searchhome=idx if 'hhh.com.tw' in href: hhh=idx ranking=idx print("updsert") print({'name':q,'updated':datetime.datetime.now()}) table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name']) m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100,'vip':vip} masters.append(m) table_summary.insert(m) print(href) print(txt) # table.insert({'designer':q,'title':txt,'url':href,'ranking':idx}) dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S") d={'designer':q,'title':txt,'url':href,'ranking':idx} details.append(d) table_details.insert({'q':q,'dt':datetime.datetime.now(),'designer':d['designer'],'title':d['title'],'url':d['url'],'ranking':idx,'hhh':hhh,'searchome':searchhome,'c_100':com100}) idx+=1 # time.sleep(9999) if ranking==-1: ranking=idx print("updsert") print({'name':q,'updated':datetime.datetime.now()}) table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name']) m={'designer':q,'title':txt,'url':href,'hhh':hhh,'dt':datetime.datetime.now(),'searchome':searchhome,'c_100':com100} masters.append(m) table_summary.insert(m) db.commit() time.sleep(3) # print(masters) # print(details) return {'masters':masters,'details':details} # 寓子設計 #qlist=['元均制作'] #qlist=['三宅一秀'] #qlist=['采品室內設計'] #qlist=['寓子設計'] #qlist=['綵韻室內設計','春雨時尚空間','阿曼空間設計','雅典設計','境庭國際設計'] #qlist=['豐聚室內裝修','張馨室內設計','尚藝室內裝修','富億空間設計','比沙列室內裝修'] #qlist=['森境王俊宏設計','格綸設計','齊舍設計','采舍空間設計','大琚空間設計'] #qlist=['將作空間','昱承室內裝修','YHS DESIGN','德本迪室內設計','東風室內設計'] #qlist=['陶璽空間設計','惹雅國際設計','浩室設計','藝谷空間設計','IS國際設計'] #qlist=['摩登雅舍室內','星葉室內裝修','浩室設計','演拓空間','千綵胤空間'] #qlist=['京璽國際','元典設計','朱英凱室內設計','亞維空間設計','馥築時尚設計'] #qlist=['文儀室內裝修','寓子設計','恆岳空間設計','卓林室內設計','歐德傢俱'] #qlist=['大久空間設計','成綺空間設計','知域設計','尚展空間設計','演繹動線空間'] #qlist=['苡希創意設計','玖柞設計','維耕設計','昱森室內設計','上築空間設計'] #qlist=['HATCH合砌設計','至文室內裝修','上陽設計','禾禾設計','聯寬室內裝修'] #設計 #qlist=['三宅一秀','萬寶隆空間設計','含仰空間設計','元均制作','承炫裝修'] qlist=[] cursor=db.query('select name,vip from customer_list order by updated asc limit 50') #cursor=db.query('select name,vip from customer_list where vip = 0 order by updated asc limit 20') #cursor=db.query('select name,vip from customer_list where vip = 1 order by updated asc limit 20') for c in cursor: qlist.append((c['name'],c['vip'])) #get_designer_statistics([qlist[0]]) get_designer_statistics(qlist) # if d['designer']==q: # df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr] # idx+=1 # df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')