123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338 |
- from pydoc import HTMLDoc
- from fastapi import FastAPI
- import dataset
- import sys
- import os
- import time
- from fastapi.middleware.cors import CORSMiddleware
- from fastapi.staticfiles import StaticFiles
- from pydantic import BaseModel
- from fastapi import FastAPI, Form, Response, File, UploadFile, Request
- import subprocess
- import suggests
- from typing import Optional
- # import networkx as nx
- # import pyvis
- # import time
- # from pyvis.network import Network
- import pickle
- import logging
- import threading
- import random
- import string
- from fastapi.responses import HTMLResponse,RedirectResponse, FileResponse
- import dataset
- import traceback
- import time
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- import networkx as nx
- from pyvis.network import Network
- import csv
- import sys
- import codecs
- import difflib
- import pymysql
- pymysql.install_as_MySQLdb()
- from pathlib import Path
- from tempfile import NamedTemporaryFile
- from typing import Callable
- import shutil
- # import aiofiles
- from io import StringIO
- driver = None
- def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
- return ''.join(random.choice(chars) for _ in range(size))
- app = FastAPI()
- origins = ["*"]
- app.add_middleware(
- CORSMiddleware,
- allow_origins=origins,
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
- )
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- app.mount("/web", StaticFiles(directory="/Users/mac/PycharmProjects/kw_tools/web/static"), name="static")
- # app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
- def thread_function(kw):
- global db
- print(kw)
- G = nx.Graph()
- for k in kw:
- s = suggests.suggests.get_suggests(k, source='google')
- for sg in s['suggests']:
- G.add_edge(k,sg,weight=1)
- print(sg)
- time.sleep(1)
- s2 = suggests.suggests.get_suggests(k, source='google')
- for elmt in s2['suggests']:
- G.add_edge(sg,elmt,weight=1)
- # G.remove_nodes_from(list(nx.isolates(G)))
- G.remove_edges_from( list(nx.selfloop_edges(G)))
- # pickle.dump( G, open( "gs2.p", "wb" ) )
- pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
- pyG.from_nx(G)
- id=id_generator()
- db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
- # pyG.save_graph('gstest')
- # pyG.show('static/gs/'+str(id)+'.html')
- pyG.save_graph('static/gs/'+str(id)+'.html')
- @app.get("/tree_list/",response_class=HTMLResponse)
- async def tree_list():
- # global db
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- html="<html><body><h2>清單</h2></br>請一分鐘後refresh </br></br>"
- html+="<table border='1'>"
- cursor=db.query('select filename,kw from gen_graph order by id desc')
- cnt=0
- for c in cursor:
- html+="<tr><td>"+c['kw']+"</td>"
- html+="<td><a href='/web/gs/"+c['filename']+".html'>"+c['filename']+"</a></td></tr>"
- cnt+=1
- if cnt > 10:
- break
- html+="</table></body></html>"
- return html
- @app.post("/proj_kw/",response_class=HTMLResponse)
- async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- table=db['serp_jobs']
- for kw in kws:
- table.insert({'proj':proj,'kw':kw})
- return "OK請稍後"
- #response_class=RedirectResponse
- @app.post("/gen_tree/",response_class=HTMLResponse)
- async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
- kwlst=[]
- if len(kw)>1:
- kwlst.append(kw)
- if kw2 is not None:
- kwlst.append(kw2)
- if kw3 is not None:
- kwlst.append(kw3)
- if kw4 is not None:
- kwlst.append(kw4)
- x = threading.Thread(target=thread_function, args=(kwlst,))
- x.start()
- # return "ok"
- return RedirectResponse(url="/tree_list",status_code=302)
- # return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
- def restart_browser():
- global driver
- if driver is not None:
- print('closing')
- driver.quit()
- driver = None
- try:
- options = webdriver.ChromeOptions()
- options.add_argument("--no-sandbox")
- options.add_argument("--disable-dev-shm-usage")
- options.add_argument('--headless')
- #options.add_argument('--remote-debugging-port=9222')
- #options.add_experimental_option("debuggerAddress", "127.0.0.1:9922")
- options.add_argument("--incognito")
- try:
- driver = webdriver.Chrome(options=options)
- str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
- print('這裡',str1)
- #driver = webdriver.Remote(command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',options=options)
- except:
- return None
- except:
- print('開啟失敗')
- driver=None
- return None
- return driver
-
- @app.post("/ranking/")
- async def ranking(kw: str = Form(...), domain:str = Form(...),kw2:Optional[str] = Form(None),domain2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),domain3:Optional[str] = Form(None),kw4:Optional[str] = Form(None),domain4:Optional[str] = Form(None),kw5:Optional[str] = Form(None),domain5:Optional[str] = Form(None)):
- kwlst = []
- kwlst.append([kw,domain])
- if kw2 is not None:
- kwlst.append([kw2,domain2])
- if kw3 is not None:
- kwlst.append([kw3,domain3])
- if kw4 is not None:
- kwlst.append([kw4,domain4])
- if kw5 is not None:
- kwlst.append([kw5,domain5])
- result = []
- for i in kwlst:
- driver = restart_browser()
- # escaped_search_term=urllib.parse.quote(term)
- googleurl = 'https://www.google.com/?num=100'
- driver.get(googleurl)
- time.sleep(6)
- send_kw_elmt = driver.find_element(By.XPATH,
- '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
- send_kw_elmt.send_keys(i[0])
- time.sleep(3)
- send_kw_elmt.send_keys(Keys.ENTER)
- time.sleep(6)
- elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
- cnt = 1
- datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
- domain_name = i[1]
- for elmt in elmts:
- try:
- href = elmt.get_attribute('href')
- if domain_name in href:
- datadict['搜尋詞'].append(i[0])
- datadict['結果標題'].append(elmt.text)
- datadict['結果網址'].append(href)
- datadict['結果名次'].append(str(cnt))
- cnt += 1
- except:
- print('href2 exception')
- traceback.print_exc()
- result.append(datadict)
- print(domain_name)
- print(datadict)
- driver.quit()
- print('數量',len(elmts))
- time.sleep(90)
- # return "ok"
- # return RedirectResponse(url="/ranking_result",)
- html = f"<html><body>{result}</body></html>"
- return html
- @app.get("/ranking_result/")
- async def tree_list():
- html = "<table border='1'>"
- # html += "<tr><td>" + c['kw'] + "</td>"
-
- return html
- kwdict={}
- G = nx.Graph()
- def gcm0(strings):
- clusters = {}
- for string in (x.strip() for x in strings):
- match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
- if match:
- clusters[match[0]].append(string)
- else:
- clusters[string] = [ string ]
- return clusters
- def proc_row(row):
- print('這裡',row)
- elmts=row.split(' ')
- print(elmts)
- for elmt in elmts:
- if kwdict.get(elmt) is None:
- kwdict[elmt]=1
- else:
- kwdict[elmt]+=1
- def save_upload_file_tmp(file: UploadFile) -> Path:
- try:
- suffix = Path(file.filename).suffix
- with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
- shutil.copyfileobj(file.file, tmp)
- tmp_path = Path(tmp.name)
- finally:
- file.file.close()
- return tmp_path
-
- @app.post("/kwtree")
- async def kwtree(file: UploadFile = File(...)):
- csvfile = csv.reader(codecs.iterdecode(file.file, 'utf-8'),dialect=csv.excel)
- kwdict = {}
- addict = {}
- head = True
- rowlst = []
- for row in csvfile:
- if head:
- head = False
- continue
- ll = len(row)
- proc_row(row[0])
- if row not in rowlst:
- rowlst.append(row[0])
- head = True
- clusters = gcm0(rowlst)
- keys = []
- for k, v in clusters.items():
- # if len(v) > 20:
- keys.append(k)
- for x in v:
- G.add_edge(k, x, weight=1, label='')
- already_dict = {}
- from strsimpy.qgram import QGram
- qgram = QGram(2)
- for k1 in keys:
- for k2 in keys:
- if k1 != k2:
- if qgram.distance(k1, k2) <= 12:
- if already_dict.get(k1) is None and already_dict.get(k2) is None:
- already_dict[k1] = 1
- already_dict[k2] = 1
- G.add_edge(k1, k2, weight=1, label='')
- pyG = Network(height="100%", width="100%", bgcolor="#444444", font_color="white")
- pyG.set_options("""
- const options = {
- "nodes" : {
- "font" : {
- "size" : "30",
- "color" : "#ffffff"
- }
- },
- "physics": {
- "forceAtlas2Based": {
- "springLength": 100
- },
- "maxVelocity": 150,
- "minVelocity": 0.28,
- "solver": "forceAtlas2Based"
- }
- }
- """)
- G.remove_edges_from(nx.selfloop_edges(G))
- pyG.from_nx(G)
- # pyG.show_buttons(filter_=['physics'])
- news_file = random.randint(0,100)
- pyG.show(f'news{news_file}.html')
- check_file = False
- # while
- # print(clusters)
- # sys.exit()
- return FileResponse(f'/Users/mac/PycharmProjects/kw_tools/web/news{news_file}.html',media_type='text/html')
|