main.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. from pydoc import HTMLDoc
  2. from fastapi import FastAPI
  3. import dataset
  4. import sys
  5. import os
  6. import time
  7. from fastapi.middleware.cors import CORSMiddleware
  8. from fastapi.staticfiles import StaticFiles
  9. from pydantic import BaseModel
  10. from fastapi import FastAPI, Form, Response
  11. import subprocess
  12. import suggests
  13. from typing import Optional
  14. import networkx as nx
  15. import pyvis
  16. import time
  17. from pyvis.network import Network
  18. import pickle
  19. import logging
  20. import threading
  21. import random
  22. import string
  23. from fastapi.responses import HTMLResponse
  24. from fastapi.responses import RedirectResponse
  25. import dataset
  26. from selenium import webdriver
  27. import traceback
  28. import time
  29. from selenium.webdriver.common.keys import Keys
  30. from selenium.webdriver.common.by import By
  31. # import pymysql
  32. # pymysql.install_as_MySQLdb()
  33. driver = None
  34. def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
  35. return ''.join(random.choice(chars) for _ in range(size))
  36. app = FastAPI()
  37. origins = ["*"]
  38. app.add_middleware(
  39. CORSMiddleware,
  40. allow_origins=origins,
  41. allow_credentials=True,
  42. allow_methods=["*"],
  43. allow_headers=["*"],
  44. )
  45. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
  46. # app.mount("/web", StaticFiles(directory="static"), name="static")
  47. app.mount("/web", StaticFiles(directory="/Users/zooeytsai/kw_tools/web/static"), name="static")
  48. def thread_function(kw):
  49. global db
  50. print(kw)
  51. G = nx.Graph()
  52. for k in kw:
  53. s = suggests.suggests.get_suggests(k, source='google')
  54. for sg in s['suggests']:
  55. G.add_edge(k,sg,weight=1)
  56. print(sg)
  57. time.sleep(1)
  58. s2 = suggests.suggests.get_suggests(k, source='google')
  59. for elmt in s2['suggests']:
  60. G.add_edge(sg,elmt,weight=1)
  61. # G.remove_nodes_from(list(nx.isolates(G)))
  62. G.remove_edges_from( list(nx.selfloop_edges(G)))
  63. # pickle.dump( G, open( "gs2.p", "wb" ) )
  64. pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
  65. pyG.from_nx(G)
  66. id=id_generator()
  67. db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
  68. # pyG.save_graph('gstest')
  69. # pyG.show('static/gs/'+str(id)+'.html')
  70. pyG.save_graph('static/gs/'+str(id)+'.html')
  71. @app.get("/tree_list/",response_class=HTMLResponse)
  72. async def tree_list():
  73. # global db
  74. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
  75. html="<html><body><h2>清單</h2></br>請一分鐘後refresh </br></br>"
  76. html+="<table border='1'>"
  77. cursor=db.query('select filename,kw from gen_graph order by id desc')
  78. cnt=0
  79. for c in cursor:
  80. html+="<tr><td>"+c['kw']+"</td>"
  81. html+="<td><a href='/web/gs/"+c['filename']+".html'>"+c['filename']+"</a></td></tr>"
  82. cnt+=1
  83. if cnt > 10:
  84. break
  85. html+="</table></body></html>"
  86. return html
  87. @app.post("/proj_kw/",response_class=HTMLResponse)
  88. async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
  89. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  90. table=db['serp_jobs']
  91. for kw in kws:
  92. table.insert({'proj':proj,'kw':kw})
  93. return "OK請稍後"
  94. #response_class=RedirectResponse
  95. @app.post("/gen_tree/",response_class=HTMLResponse)
  96. async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
  97. kwlst=[]
  98. if len(kw)>1:
  99. kwlst.append(kw)
  100. if kw2 is not None:
  101. kwlst.append(kw2)
  102. if kw3 is not None:
  103. kwlst.append(kw3)
  104. if kw4 is not None:
  105. kwlst.append(kw4)
  106. x = threading.Thread(target=thread_function, args=(kwlst,))
  107. x.start()
  108. # return "ok"
  109. return RedirectResponse(url="/tree_list",status_code=302)
  110. # return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
  111. @app.post("/ranking/")
  112. async def ranking(kw: str = Form(...), domain:str = Form(...)):
  113. global driver
  114. options = webdriver.ChromeOptions()
  115. options.add_argument("--no-sandbox")
  116. options.add_argument("--disable-dev-shm-usage")
  117. options.add_argument('--headless')
  118. driver = webdriver.Chrome(options=options)
  119. driver.set_window_size(950, 20000)
  120. # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  121. db_local = dataset.connect('mysql://root:jondae350@localhost/ig_tags')
  122. table = db_local['google_rank']
  123. # escaped_search_term=urllib.parse.quote(term)
  124. googleurl = 'https://www.google.com/?num=100'
  125. driver.get(googleurl)
  126. time.sleep(6)
  127. send_kw_elmt = driver.find_element(By.XPATH,
  128. '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  129. send_kw_elmt.send_keys(kw)
  130. time.sleep(3)
  131. send_kw_elmt.send_keys(Keys.ENTER)
  132. time.sleep(6)
  133. elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  134. cnt = 1
  135. datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
  136. for elmt in elmts:
  137. try:
  138. href = elmt.get_attribute('href')
  139. if domain in href:
  140. datadict['搜尋詞'].append(kw)
  141. datadict['結果標題'].append(elmt.text)
  142. datadict['結果網址'].append(href)
  143. datadict['結果名次'].append(str(cnt))
  144. cnt += 1
  145. except:
  146. print('href2 exception')
  147. traceback.print_exc()
  148. driver.quit()
  149. time.sleep(60)
  150. # db.close()
  151. db_local.close()
  152. # return "ok"
  153. # return RedirectResponse(url="/ranking_result",)
  154. html = "<html><body><h2>"+str(datadict)+"</br></br>"
  155. return html
  156. @app.get("/ranking_result/")
  157. async def tree_list():
  158. html = "<table border='1'>"
  159. # html += "<tr><td>" + c['kw'] + "</td>"
  160. return html