from pydoc import HTMLDoc from fastapi import FastAPI import dataset import sys import os import time from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from fastapi import FastAPI, Form, Response, File, UploadFile, Request import subprocess import suggests from typing import Optional # import networkx as nx # import pyvis # import time # from pyvis.network import Network import pickle import logging import threading import random import string from fastapi.responses import HTMLResponse,RedirectResponse, FileResponse import dataset import traceback import time from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service import networkx as nx from pyvis.network import Network import csv import sys import codecs import difflib import pymysql pymysql.install_as_MySQLdb() from pathlib import Path from tempfile import NamedTemporaryFile from typing import Callable import shutil # import aiofiles from io import StringIO driver = None def id_generator(size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) app = FastAPI() origins = ["*"] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4') app.mount("/web", StaticFiles(directory="/Users/mac/PycharmProjects/kw_tools/web/static"), name="static") # app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static") def thread_function(kw): global db print(kw) G = nx.Graph() for k in kw: s = suggests.suggests.get_suggests(k, source='google') for sg in s['suggests']: G.add_edge(k,sg,weight=1) print(sg) time.sleep(1) s2 = suggests.suggests.get_suggests(k, source='google') for elmt in s2['suggests']: G.add_edge(sg,elmt,weight=1) # G.remove_nodes_from(list(nx.isolates(G))) G.remove_edges_from( list(nx.selfloop_edges(G))) # pickle.dump( G, open( "gs2.p", "wb" ) ) pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white") pyG.from_nx(G) id=id_generator() db['gen_graph'].insert({'filename':str(id),'kw':str(kw)}) # pyG.save_graph('gstest') # pyG.show('static/gs/'+str(id)+'.html') pyG.save_graph('static/gs/'+str(id)+'.html') @app.get("/tree_list/",response_class=HTMLResponse) async def tree_list(): # global db db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4') html="

清單


請一分鐘後refresh

" html+="" cursor=db.query('select filename,kw from gen_graph order by id desc') cnt=0 for c in cursor: html+="" html+="" cnt+=1 if cnt > 10: break html+="
"+c['kw']+""+c['filename']+"
" return html @app.post("/proj_kw/",response_class=HTMLResponse) async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') table=db['serp_jobs'] for kw in kws: table.insert({'proj':proj,'kw':kw}) return "OK請稍後" #response_class=RedirectResponse @app.post("/gen_tree/",response_class=HTMLResponse) async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ): kwlst=[] if len(kw)>1: kwlst.append(kw) if kw2 is not None: kwlst.append(kw2) if kw3 is not None: kwlst.append(kw3) if kw4 is not None: kwlst.append(kw4) x = threading.Thread(target=thread_function, args=(kwlst,)) x.start() # return "ok" return RedirectResponse(url="/tree_list",status_code=302) # return HTMLResponse('') def restart_browser(): global driver if driver is not None: print('closing') driver.quit() driver = None try: options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument('--headless') #options.add_argument('--remote-debugging-port=9222') #options.add_experimental_option("debuggerAddress", "127.0.0.1:9922") options.add_argument("--incognito") try: driver = webdriver.Chrome(options=options) str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0] print('這裡',str1) #driver = webdriver.Remote(command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',options=options) except: return None except: print('開啟失敗') driver=None return None return driver @app.post("/ranking/") async def ranking(kw: str = Form(...), domain:str = Form(...),kw2:Optional[str] = Form(None),domain2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),domain3:Optional[str] = Form(None),kw4:Optional[str] = Form(None),domain4:Optional[str] = Form(None),kw5:Optional[str] = Form(None),domain5:Optional[str] = Form(None)): kwlst = [] kwlst.append([kw,domain]) if kw2 is not None: kwlst.append([kw2,domain2]) if kw3 is not None: kwlst.append([kw3,domain3]) if kw4 is not None: kwlst.append([kw4,domain4]) if kw5 is not None: kwlst.append([kw5,domain5]) result = [] for i in kwlst: driver = restart_browser() # escaped_search_term=urllib.parse.quote(term) googleurl = 'https://www.google.com/?num=100' driver.get(googleurl) time.sleep(6) send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input') send_kw_elmt.send_keys(i[0]) time.sleep(3) send_kw_elmt.send_keys(Keys.ENTER) time.sleep(6) elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") cnt = 1 datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []} domain_name = i[1] for elmt in elmts: try: href = elmt.get_attribute('href') if domain_name in href: datadict['搜尋詞'].append(i[0]) datadict['結果標題'].append(elmt.text) datadict['結果網址'].append(href) datadict['結果名次'].append(str(cnt)) cnt += 1 except: print('href2 exception') traceback.print_exc() result.append(datadict) print(domain_name) print(datadict) driver.quit() print('數量',len(elmts)) time.sleep(90) # return "ok" # return RedirectResponse(url="/ranking_result",) html = f"{result}" return html @app.get("/ranking_result/") async def tree_list(): html = "" # html += "" return html kwdict={} G = nx.Graph() def gcm0(strings): clusters = {} for string in (x.strip() for x in strings): match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65) if match: clusters[match[0]].append(string) else: clusters[string] = [ string ] return clusters def proc_row(row): print('這裡',row) elmts=row.split(' ') print(elmts) for elmt in elmts: if kwdict.get(elmt) is None: kwdict[elmt]=1 else: kwdict[elmt]+=1 def save_upload_file_tmp(file: UploadFile) -> Path: try: suffix = Path(file.filename).suffix with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: shutil.copyfileobj(file.file, tmp) tmp_path = Path(tmp.name) finally: file.file.close() return tmp_path @app.post("/kwtree") async def kwtree(file: UploadFile = File(...)): csvfile = csv.reader(codecs.iterdecode(file.file, 'utf-8'),dialect=csv.excel) kwdict = {} addict = {} head = True rowlst = [] for row in csvfile: if head: head = False continue ll = len(row) proc_row(row[0]) if row not in rowlst: rowlst.append(row[0]) head = True clusters = gcm0(rowlst) keys = [] for k, v in clusters.items(): # if len(v) > 20: keys.append(k) for x in v: G.add_edge(k, x, weight=1, label='') already_dict = {} from strsimpy.qgram import QGram qgram = QGram(2) for k1 in keys: for k2 in keys: if k1 != k2: if qgram.distance(k1, k2) <= 12: if already_dict.get(k1) is None and already_dict.get(k2) is None: already_dict[k1] = 1 already_dict[k2] = 1 G.add_edge(k1, k2, weight=1, label='') pyG = Network(height="100%", width="100%", bgcolor="#444444", font_color="white") pyG.set_options(""" const options = { "nodes" : { "font" : { "size" : "30", "color" : "#ffffff" } }, "physics": { "forceAtlas2Based": { "springLength": 100 }, "maxVelocity": 150, "minVelocity": 0.28, "solver": "forceAtlas2Based" } } """) G.remove_edges_from(nx.selfloop_edges(G)) pyG.from_nx(G) # pyG.show_buttons(filter_=['physics']) news_file = random.randint(0,100) pyG.show(f'news{news_file}.html') check_file = False # while # print(clusters) # sys.exit() return FileResponse(f'/Users/mac/PycharmProjects/kw_tools/web/news{news_file}.html',media_type='text/html')
" + c['kw'] + "