123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- from pydoc import HTMLDoc
- from fastapi import FastAPI
- import dataset
- import sys
- import os
- import time
- from fastapi.middleware.cors import CORSMiddleware
- from fastapi.staticfiles import StaticFiles
- from pydantic import BaseModel
- from fastapi import FastAPI, Form, Response
- import subprocess
- import suggests
- from typing import Optional
- import networkx as nx
- import pyvis
- import time
- from pyvis.network import Network
- import pickle
- import logging
- import threading
- import random
- import string
- from fastapi.responses import HTMLResponse
- from fastapi.responses import RedirectResponse
- import dataset
- from selenium import webdriver
- import traceback
- import time
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.by import By
- # import pymysql
- # pymysql.install_as_MySQLdb()
- driver = None
- def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
- return ''.join(random.choice(chars) for _ in range(size))
- app = FastAPI()
- origins = ["*"]
- app.add_middleware(
- CORSMiddleware,
- allow_origins=origins,
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
- )
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- # app.mount("/web", StaticFiles(directory="static"), name="static")
- app.mount("/web", StaticFiles(directory="/Users/zooeytsai/kw_tools/web/static"), name="static")
- def thread_function(kw):
- global db
- print(kw)
- G = nx.Graph()
- for k in kw:
- s = suggests.suggests.get_suggests(k, source='google')
- for sg in s['suggests']:
- G.add_edge(k,sg,weight=1)
- print(sg)
- time.sleep(1)
- s2 = suggests.suggests.get_suggests(k, source='google')
- for elmt in s2['suggests']:
- G.add_edge(sg,elmt,weight=1)
- # G.remove_nodes_from(list(nx.isolates(G)))
- G.remove_edges_from( list(nx.selfloop_edges(G)))
- # pickle.dump( G, open( "gs2.p", "wb" ) )
- pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
- pyG.from_nx(G)
- id=id_generator()
- db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
- # pyG.save_graph('gstest')
- # pyG.show('static/gs/'+str(id)+'.html')
- pyG.save_graph('static/gs/'+str(id)+'.html')
- @app.get("/tree_list/",response_class=HTMLResponse)
- async def tree_list():
- # global db
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- html="<html><body><h2>清單</h2></br>請一分鐘後refresh </br></br>"
- html+="<table border='1'>"
- cursor=db.query('select filename,kw from gen_graph order by id desc')
- cnt=0
- for c in cursor:
- html+="<tr><td>"+c['kw']+"</td>"
- html+="<td><a href='/web/gs/"+c['filename']+".html'>"+c['filename']+"</a></td></tr>"
- cnt+=1
- if cnt > 10:
- break
- html+="</table></body></html>"
- return html
- @app.post("/proj_kw/",response_class=HTMLResponse)
- async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- table=db['serp_jobs']
- for kw in kws:
- table.insert({'proj':proj,'kw':kw})
- return "OK請稍後"
- #response_class=RedirectResponse
- @app.post("/gen_tree/",response_class=HTMLResponse)
- async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
- kwlst=[]
- if len(kw)>1:
- kwlst.append(kw)
- if kw2 is not None:
- kwlst.append(kw2)
- if kw3 is not None:
- kwlst.append(kw3)
- if kw4 is not None:
- kwlst.append(kw4)
- x = threading.Thread(target=thread_function, args=(kwlst,))
- x.start()
- # return "ok"
- return RedirectResponse(url="/tree_list",status_code=302)
- # return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
-
- @app.post("/ranking/")
- async def ranking(kw: str = Form(...), domain:str = Form(...)):
- global driver
- options = webdriver.ChromeOptions()
- options.add_argument("--no-sandbox")
- options.add_argument("--disable-dev-shm-usage")
- options.add_argument('--headless')
- driver = webdriver.Chrome(options=options)
- driver.set_window_size(950, 20000)
- # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- db_local = dataset.connect('mysql://root:jondae350@localhost/ig_tags')
- table = db_local['google_rank']
- # escaped_search_term=urllib.parse.quote(term)
- googleurl = 'https://www.google.com/?num=100'
- driver.get(googleurl)
- time.sleep(6)
- send_kw_elmt = driver.find_element(By.XPATH,
- '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
- send_kw_elmt.send_keys(kw)
- time.sleep(3)
- send_kw_elmt.send_keys(Keys.ENTER)
- time.sleep(6)
- elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
- cnt = 1
- datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
- for elmt in elmts:
- try:
- href = elmt.get_attribute('href')
- if domain in href:
- datadict['搜尋詞'].append(kw)
- datadict['結果標題'].append(elmt.text)
- datadict['結果網址'].append(href)
- datadict['結果名次'].append(str(cnt))
- cnt += 1
- except:
- print('href2 exception')
- traceback.print_exc()
- driver.quit()
- time.sleep(60)
- # db.close()
- db_local.close()
- # return "ok"
- # return RedirectResponse(url="/ranking_result",)
- html = "<html><body><h2>"+str(datadict)+"</br></br>"
- return html
- @app.get("/ranking_result/")
- async def tree_list():
- html = "<table border='1'>"
- # html += "<tr><td>" + c['kw'] + "</td>"
-
- return html
|