zooeytsai 2 年 前
コミット
89ba489dbe
3 ファイル変更148 行追加3 行削除
  1. 61 0
      web/clickbot_100.py
  2. 73 3
      web/main.py
  3. 14 0
      web/static/ranking.html

+ 61 - 0
web/clickbot_100.py

@@ -0,0 +1,61 @@
+
+import dataset
+from selenium import webdriver
+import traceback
+import time
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+import pymysql
+pymysql.install_as_MySQLdb()
+
+
+path = 'C:\portable\chromedriver'
+path_z = '/Users/zooeytsai/Downloads/chromedriver 4'
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    # options.add_argument("user-agent=%s" % rua())
+    options.add_argument('--headless')
+    driver=webdriver.Chrome(options=options,executable_path=path_z)
+    driver.set_window_size(950, 20000)
+    return driver
+
+
+def process_one(kw,domain):
+    # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    db_local = dataset.connect('mysql://root:jondae350@localhost/ig_tags')
+    table=db_local['google_rank']
+
+    driver=restart_browser()
+    # escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/?num=100'
+    driver.get(googleurl)
+    time.sleep(6)
+    send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
+    send_kw_elmt.send_keys(kw)
+    time.sleep(3)
+    send_kw_elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            if domain in href:
+                datadict['搜尋詞'].append(kw)
+                datadict['結果標題'].append(elmt.text)
+                datadict['結果網址'].append(href)
+                datadict['結果名次'].append(str(cnt))
+            cnt+=1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    
+    driver.quit()
+    time.sleep(60)
+    # db.close()
+    db_local.close()
+    return datadict
+

+ 73 - 3
web/main.py

@@ -7,7 +7,7 @@ import time
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
-from fastapi import FastAPI, Form
+from fastapi import FastAPI, Form, Response
 import subprocess
 import suggests
 from typing import Optional
@@ -18,11 +18,21 @@ from pyvis.network import Network
 import pickle
 import logging
 import threading
-import time
 import random
 import string
 from fastapi.responses import HTMLResponse
 from fastapi.responses import RedirectResponse
+import dataset
+from selenium import webdriver
+import traceback
+import time
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+# import pymysql
+# pymysql.install_as_MySQLdb()
+
+driver = None
+
 def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
     return ''.join(random.choice(chars) for _ in range(size))
 
@@ -39,7 +49,8 @@ app.add_middleware(
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
 
-app.mount("/web", StaticFiles(directory="static"), name="static")
+# app.mount("/web", StaticFiles(directory="static"), name="static")
+app.mount("/web", StaticFiles(directory="/Users/zooeytsai/kw_tools/web/static"), name="static")
 
 def thread_function(kw):
     global db
@@ -116,3 +127,62 @@ async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Opt
     return RedirectResponse(url="/tree_list",status_code=302)
 #    return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
 
+    
+@app.post("/ranking/")
+async def ranking(kw: str = Form(...), domain:str = Form(...)):
+    global driver
+    options = webdriver.ChromeOptions()
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument('--headless')
+    driver = webdriver.Chrome(options=options)
+    driver.set_window_size(950, 20000)
+
+    # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    db_local = dataset.connect('mysql://root:jondae350@localhost/ig_tags')
+    table = db_local['google_rank']
+
+    # escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/?num=100'
+    driver.get(googleurl)
+    time.sleep(6)
+    send_kw_elmt = driver.find_element(By.XPATH,
+                                       '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
+    send_kw_elmt.send_keys(kw)
+    time.sleep(3)
+    send_kw_elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+    elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    cnt = 1
+    datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
+
+    for elmt in elmts:
+        try:
+            href = elmt.get_attribute('href')
+            if domain in href:
+                datadict['搜尋詞'].append(kw)
+                datadict['結果標題'].append(elmt.text)
+                datadict['結果網址'].append(href)
+                datadict['結果名次'].append(str(cnt))
+            cnt += 1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+
+    driver.quit()
+    time.sleep(60)
+    # db.close()
+    db_local.close()
+    # return "ok"
+    # return RedirectResponse(url="/ranking_result",)
+    html = "<html><body><h2>"+str(datadict)+"</br></br>"
+    return html
+
+@app.get("/ranking_result/")
+async def tree_list():
+    html = "<table border='1'>"
+
+    # html += "<tr><td>" + c['kw'] + "</td>"
+    
+    return html

+ 14 - 0
web/static/ranking.html

@@ -0,0 +1,14 @@
+<html>
+    <body>
+        <form action="/ranking" method="post">
+            KW: <input type="text" id="kw" name="kw" value=""></br>
+            domain: <input type="text" id="domain" name="domain" value=""></br>
+            <input type="submit" value="開始">
+        </form>
+    </br>
+</br>
+
+
+
+</body>
+</html>