from pydoc import HTMLDoc
from fastapi import FastAPI
import dataset
import sys
import os
import time
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from fastapi import FastAPI, Form, Response, File, UploadFile, Request
import subprocess
import suggests
from typing import Optional
# import networkx as nx
# import pyvis
# import time
# from pyvis.network import Network
import pickle
import logging
import threading
import random
import string
from fastapi.responses import HTMLResponse,RedirectResponse, FileResponse
import dataset
import traceback
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import networkx as nx
from pyvis.network import Network
import csv
import sys
import codecs
import difflib
import pymysql
pymysql.install_as_MySQLdb()
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Callable
import shutil
# import aiofiles
from io import StringIO
driver = None
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
app = FastAPI()
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
app.mount("/web", StaticFiles(directory="/Users/mac/PycharmProjects/kw_tools/web/static"), name="static")
# app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
def thread_function(kw):
global db
print(kw)
G = nx.Graph()
for k in kw:
s = suggests.suggests.get_suggests(k, source='google')
for sg in s['suggests']:
G.add_edge(k,sg,weight=1)
print(sg)
time.sleep(1)
s2 = suggests.suggests.get_suggests(k, source='google')
for elmt in s2['suggests']:
G.add_edge(sg,elmt,weight=1)
# G.remove_nodes_from(list(nx.isolates(G)))
G.remove_edges_from( list(nx.selfloop_edges(G)))
# pickle.dump( G, open( "gs2.p", "wb" ) )
pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
pyG.from_nx(G)
id=id_generator()
db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
# pyG.save_graph('gstest')
# pyG.show('static/gs/'+str(id)+'.html')
pyG.save_graph('static/gs/'+str(id)+'.html')
@app.get("/tree_list/",response_class=HTMLResponse)
async def tree_list():
# global db
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
html="
清單
請一分鐘後refresh "
html+=""
cursor=db.query('select filename,kw from gen_graph order by id desc')
cnt=0
for c in cursor:
html+=""+c['kw']+" | "
html+=""+c['filename']+" |
"
cnt+=1
if cnt > 10:
break
html+="
"
return html
@app.post("/proj_kw/",response_class=HTMLResponse)
async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
table=db['serp_jobs']
for kw in kws:
table.insert({'proj':proj,'kw':kw})
return "OK請稍後"
#response_class=RedirectResponse
@app.post("/gen_tree/",response_class=HTMLResponse)
async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
kwlst=[]
if len(kw)>1:
kwlst.append(kw)
if kw2 is not None:
kwlst.append(kw2)
if kw3 is not None:
kwlst.append(kw3)
if kw4 is not None:
kwlst.append(kw4)
x = threading.Thread(target=thread_function, args=(kwlst,))
x.start()
# return "ok"
return RedirectResponse(url="/tree_list",status_code=302)
# return HTMLResponse('')
def restart_browser():
global driver
if driver is not None:
print('closing')
driver.quit()
driver = None
try:
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument('--headless')
#options.add_argument('--remote-debugging-port=9222')
#options.add_experimental_option("debuggerAddress", "127.0.0.1:9922")
options.add_argument("--incognito")
try:
driver = webdriver.Chrome(options=options)
str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
print('這裡',str1)
#driver = webdriver.Remote(command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',options=options)
except:
return None
except:
print('開啟失敗')
driver=None
return None
return driver
@app.post("/ranking/")
async def ranking(kw: str = Form(...), domain:str = Form(...),kw2:Optional[str] = Form(None),domain2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),domain3:Optional[str] = Form(None),kw4:Optional[str] = Form(None),domain4:Optional[str] = Form(None),kw5:Optional[str] = Form(None),domain5:Optional[str] = Form(None)):
kwlst = []
kwlst.append([kw,domain])
if kw2 is not None:
kwlst.append([kw2,domain2])
if kw3 is not None:
kwlst.append([kw3,domain3])
if kw4 is not None:
kwlst.append([kw4,domain4])
if kw5 is not None:
kwlst.append([kw5,domain5])
result = []
for i in kwlst:
driver = restart_browser()
# escaped_search_term=urllib.parse.quote(term)
googleurl = 'https://www.google.com/?num=100'
driver.get(googleurl)
time.sleep(6)
send_kw_elmt = driver.find_element(By.XPATH,
'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
send_kw_elmt.send_keys(i[0])
time.sleep(3)
send_kw_elmt.send_keys(Keys.ENTER)
time.sleep(6)
elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
cnt = 1
datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
domain_name = i[1]
for elmt in elmts:
try:
href = elmt.get_attribute('href')
if domain_name in href:
datadict['搜尋詞'].append(i[0])
datadict['結果標題'].append(elmt.text)
datadict['結果網址'].append(href)
datadict['結果名次'].append(str(cnt))
cnt += 1
except:
print('href2 exception')
traceback.print_exc()
result.append(datadict)
print(domain_name)
print(datadict)
driver.quit()
print('數量',len(elmts))
time.sleep(90)
# return "ok"
# return RedirectResponse(url="/ranking_result",)
html = f"{result}"
return html
@app.get("/ranking_result/")
async def tree_list():
html = ""
# html += "" + c['kw'] + " | "
return html
kwdict={}
G = nx.Graph()
def gcm0(strings):
clusters = {}
for string in (x.strip() for x in strings):
match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
if match:
clusters[match[0]].append(string)
else:
clusters[string] = [ string ]
return clusters
def proc_row(row):
print('這裡',row)
elmts=row.split(' ')
print(elmts)
for elmt in elmts:
if kwdict.get(elmt) is None:
kwdict[elmt]=1
else:
kwdict[elmt]+=1
def save_upload_file_tmp(file: UploadFile) -> Path:
try:
suffix = Path(file.filename).suffix
with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
shutil.copyfileobj(file.file, tmp)
tmp_path = Path(tmp.name)
finally:
file.file.close()
return tmp_path
@app.post("/kwtree")
async def kwtree(file: UploadFile = File(...)):
csvfile = csv.reader(codecs.iterdecode(file.file, 'utf-8'),dialect=csv.excel)
kwdict = {}
addict = {}
head = True
rowlst = []
for row in csvfile:
if head:
head = False
continue
ll = len(row)
proc_row(row[0])
if row not in rowlst:
rowlst.append(row[0])
head = True
clusters = gcm0(rowlst)
keys = []
for k, v in clusters.items():
# if len(v) > 20:
keys.append(k)
for x in v:
G.add_edge(k, x, weight=1, label='')
already_dict = {}
from strsimpy.qgram import QGram
qgram = QGram(2)
for k1 in keys:
for k2 in keys:
if k1 != k2:
if qgram.distance(k1, k2) <= 12:
if already_dict.get(k1) is None and already_dict.get(k2) is None:
already_dict[k1] = 1
already_dict[k2] = 1
G.add_edge(k1, k2, weight=1, label='')
pyG = Network(height="100%", width="100%", bgcolor="#444444", font_color="white")
pyG.set_options("""
const options = {
"nodes" : {
"font" : {
"size" : "30",
"color" : "#ffffff"
}
},
"physics": {
"forceAtlas2Based": {
"springLength": 100
},
"maxVelocity": 150,
"minVelocity": 0.28,
"solver": "forceAtlas2Based"
}
}
""")
G.remove_edges_from(nx.selfloop_edges(G))
pyG.from_nx(G)
# pyG.show_buttons(filter_=['physics'])
news_file = random.randint(0,100)
pyG.show(f'news{news_file}.html')
check_file = False
# while
# print(clusters)
# sys.exit()
return FileResponse(f'/Users/mac/PycharmProjects/kw_tools/web/news{news_file}.html',media_type='text/html')