Jared il y a 3 ans
Parent
commit
a7a5105ee3
1 fichiers modifiés avec 34 ajouts et 16 suppressions
  1. 34 16
      choozmo/gsearch_selenium.py

+ 34 - 16
choozmo/gsearch_selenium.py

@@ -41,13 +41,15 @@ curdir=os.path.realpath('.')
 
 #fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8')
 #fr=codecs.open(curdir+os.sep+'hhh\\seo\\contentgap.txt','r','utf-8')
-fr=codecs.open('C:\\gitlab\\kw_tools\\kw_tools\\hhh\\SEO\\contentgap.txt','r','utf-8')
-lines=fr.readlines()
+#fr=codecs.open('C:\\gitlab\\kw_tools\\kw_tools\\hhh\\SEO\\contentgap.txt','r','utf-8')
+#lines=fr.readlines()
 lst=[]
-for l in lines:
-    lst.append(l.replace('\n',''))
-
-
+#for l in lines:
+#    lst.append(l.replace('\n',''))
+#
+cursor=db.query('select term from hhh.contentgap_terms where term not in (SELECT kw FROM hhh.hhh_contentgap_serp where datediff(now(),dt) =0 and ranking is not null )')
+for c in cursor:
+    lst.append(c['term'])
 
 
 
@@ -116,25 +118,34 @@ result=[]
 driver=None
 
 def restart_browser():
-    client = docker.from_env()
-    ls=client.containers.list()
-    print(ls)
-    ls[0].restart()
+    os.system('docker container restart p4444')
+
+#    client = docker.from_env()
+#    ls=client.containers.list()
+#    print(ls)
+#    ls[0].restart()
     time.sleep(10)
 
-#    options = webdriver.ChromeOptions()
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
+
     #driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
     driver = webdriver.Remote(
         command_executor='http://127.0.0.1:4444/wd/hub',
 #        command_executor='http://172.104.93.163:4444/wd/hub', 
     #command_executor='http://dev2.choozmo.com:14444/wd/hub',
-#    desired_capabilities=options.to_capabilities())
-    desired_capabilities=DesiredCapabilities.CHROME)
+    desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
     driver.set_window_size(1400,1000)
     return driver
 
+
 for l in lst:
-#for l in lst[2:]:
+#for l in lst[21:]:
+
+#for l in lst[32:]:
+#for l in lst[42:]:
+
     if True:
 #    if kwlst.get(l) is None:
         driver=restart_browser()
@@ -142,9 +153,16 @@ for l in lst:
     #    l='房間 油漆'
     #    idx=process_query(,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
         idx=process_query(l,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
-
-        table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()})
+#        if idx is None:
+#            sys.exit()
         print({'kw':l,'ranking':idx})
+        if idx==None:
+            print(driver.page_source)
+            if '我們的系統偵測到您的電腦網路送出的流量有異常情況' in driver.page_source:
+                print('baned.....')
+                sys.exit()
+        table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()})
+
         db.commit()
     #    time.sleep(9999)