123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- from pydoc import HTMLDoc
- from fastapi import FastAPI
- import dataset
- import sys
- import os
- import time
- from fastapi.middleware.cors import CORSMiddleware
- from fastapi.staticfiles import StaticFiles
- from pydantic import BaseModel
- from fastapi import FastAPI, Form, Response
- import subprocess
- import suggests
- from typing import Optional
- import networkx as nx
- import pyvis
- import time
- from pyvis.network import Network
- import pickle
- import logging
- import threading
- import random
- import string
- from fastapi.responses import HTMLResponse
- from fastapi.responses import RedirectResponse
- import dataset
- from selenium import webdriver
- import traceback
- import time
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- # import pymysql
- # pymysql.install_as_MySQLdb()
- driver = None
- def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
- return ''.join(random.choice(chars) for _ in range(size))
- app = FastAPI()
- origins = ["*"]
- app.add_middleware(
- CORSMiddleware,
- allow_origins=origins,
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
- )
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- # app.mount("/web", StaticFiles(directory="static"), name="static")
- app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
- def thread_function(kw):
- global db
- print(kw)
- G = nx.Graph()
- for k in kw:
- s = suggests.suggests.get_suggests(k, source='google')
- for sg in s['suggests']:
- G.add_edge(k,sg,weight=1)
- print(sg)
- time.sleep(1)
- s2 = suggests.suggests.get_suggests(k, source='google')
- for elmt in s2['suggests']:
- G.add_edge(sg,elmt,weight=1)
- # G.remove_nodes_from(list(nx.isolates(G)))
- G.remove_edges_from( list(nx.selfloop_edges(G)))
- # pickle.dump( G, open( "gs2.p", "wb" ) )
- pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
- pyG.from_nx(G)
- id=id_generator()
- db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
- # pyG.save_graph('gstest')
- # pyG.show('static/gs/'+str(id)+'.html')
- pyG.save_graph('static/gs/'+str(id)+'.html')
- @app.get("/tree_list/",response_class=HTMLResponse)
- async def tree_list():
- # global db
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- html="<html><body><h2>清單</h2></br>請一分鐘後refresh </br></br>"
- html+="<table border='1'>"
- cursor=db.query('select filename,kw from gen_graph order by id desc')
- cnt=0
- for c in cursor:
- html+="<tr><td>"+c['kw']+"</td>"
- html+="<td><a href='/web/gs/"+c['filename']+".html'>"+c['filename']+"</a></td></tr>"
- cnt+=1
- if cnt > 10:
- break
- html+="</table></body></html>"
- return html
- @app.post("/proj_kw/",response_class=HTMLResponse)
- async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- table=db['serp_jobs']
- for kw in kws:
- table.insert({'proj':proj,'kw':kw})
- return "OK請稍後"
- #response_class=RedirectResponse
- @app.post("/gen_tree/",response_class=HTMLResponse)
- async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
- kwlst=[]
- if len(kw)>1:
- kwlst.append(kw)
- if kw2 is not None:
- kwlst.append(kw2)
- if kw3 is not None:
- kwlst.append(kw3)
- if kw4 is not None:
- kwlst.append(kw4)
- x = threading.Thread(target=thread_function, args=(kwlst,))
- x.start()
- # return "ok"
- return RedirectResponse(url="/tree_list",status_code=302)
- # return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
- def restart_browser():
- global driver
- while True:
- try:
- os.system('docker container restart tiny1')
- time.sleep(1)
- break
- except:
- os.system('docker container restart tiny1')
- time.sleep(10)
- if driver is not None:
- print('closing')
- driver.quit()
- driver=None
- try:
- options = webdriver.ChromeOptions()
- options.add_argument("--no-sandbox")
- #options.add_argument("--disable-dev-shm-usage")
- options.add_argument('--headless')
- options.add_experimental_option("debuggerAddress", "127.0.0.1:9923")
- options.add_argument("--incognito")
- try:
- driver = webdriver.Chrome(options=options,executable_path='/root/driver/chromedriver')
- #driver = webdriver.Remote(command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',options=options)
- except:
- return None
- except:
- print('開啟失敗')
- driver=None
- return None
- return driver
-
- @app.post("/ranking/")
- async def ranking(kw: str = Form(...), domain:str = Form(...),kw2:Optional[str] = Form(None),domain2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),domain3:Optional[str] = Form(None),kw4:Optional[str] = Form(None),domain4:Optional[str] = Form(None),kw5:Optional[str] = Form(None),domain5:Optional[str] = Form(None)):
- kwlst = []
- kwlst.append([kw,domain])
- if kw2 is not None:
- kwlst.append([kw2,domain2])
- if kw3 is not None:
- kwlst.append([kw3,domain3])
- if kw4 is not None:
- kwlst.append([kw4,domain4])
- if kw5 is not None:
- kwlst.append([kw5,domain5])
- result = []
- for i in kwlst:
- driver = restart_browser()
- # escaped_search_term=urllib.parse.quote(term)
- googleurl = 'https://www.google.com/?num=100'
- driver.get(googleurl)
- time.sleep(6)
- send_kw_elmt = driver.find_element(By.XPATH,
- '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
- send_kw_elmt.send_keys(i[0])
- time.sleep(3)
- send_kw_elmt.send_keys(Keys.ENTER)
- time.sleep(6)
- elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
- cnt = 1
- datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
- domain_name = i[1]
- for elmt in elmts:
- try:
- href = elmt.get_attribute('href')
- if domain_name in href:
- datadict['搜尋詞'].append(i[0])
- datadict['結果標題'].append(elmt.text)
- datadict['結果網址'].append(href)
- datadict['結果名次'].append(str(cnt))
- cnt += 1
- except:
- print('href2 exception')
- traceback.print_exc()
- result.append(datadict)
- print(domain_name)
- print(datadict)
- web_driver.quit()
- print('數量',len(elmts))
- time.sleep(90)
- # return "ok"
- # return RedirectResponse(url="/ranking_result",)
- html = f"<html><body>{result}</body></html>"
- return html
- @app.get("/ranking_result/")
- async def tree_list():
- html = "<table border='1'>"
- # html += "<tr><td>" + c['kw'] + "</td>"
-
- return html
|