|
@@ -7,7 +7,7 @@ import time
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
from pydantic import BaseModel
|
|
|
-from fastapi import FastAPI, Form, Response
|
|
|
+from fastapi import FastAPI, Form, Response, File, UploadFile, Request
|
|
|
import subprocess
|
|
|
import suggests
|
|
|
from typing import Optional
|
|
@@ -20,17 +20,27 @@ import logging
|
|
|
import threading
|
|
|
import random
|
|
|
import string
|
|
|
-from fastapi.responses import HTMLResponse
|
|
|
-from fastapi.responses import RedirectResponse
|
|
|
+from fastapi.responses import HTMLResponse,RedirectResponse, FileResponse
|
|
|
import dataset
|
|
|
-from selenium import webdriver
|
|
|
import traceback
|
|
|
import time
|
|
|
+from selenium import webdriver
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.chrome.service import Service
|
|
|
-# import pymysql
|
|
|
-# pymysql.install_as_MySQLdb()
|
|
|
+import networkx as nx
|
|
|
+from pyvis.network import Network
|
|
|
+import csv
|
|
|
+import sys
|
|
|
+import codecs
|
|
|
+import difflib
|
|
|
+import pymysql
|
|
|
+pymysql.install_as_MySQLdb()
|
|
|
+from pathlib import Path
|
|
|
+from tempfile import NamedTemporaryFile
|
|
|
+from typing import Callable
|
|
|
+import shutil
|
|
|
+import aiofiles
|
|
|
|
|
|
driver = None
|
|
|
|
|
@@ -50,8 +60,8 @@ app.add_middleware(
|
|
|
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
|
|
|
|
|
|
-# app.mount("/web", StaticFiles(directory="static"), name="static")
|
|
|
-app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
|
|
|
+app.mount("/web", StaticFiles(directory="/Users/zooeytsai/kw_tools/web/static"), name="static")
|
|
|
+# app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
|
|
|
|
|
|
def thread_function(kw):
|
|
|
global db
|
|
@@ -217,3 +227,111 @@ async def tree_list():
|
|
|
# html += "<tr><td>" + c['kw'] + "</td>"
|
|
|
|
|
|
return html
|
|
|
+
|
|
|
+kwdict={}
|
|
|
+G = nx.Graph()
|
|
|
+def gcm0(strings):
|
|
|
+ clusters = {}
|
|
|
+ for string in (x.strip() for x in strings):
|
|
|
+ match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
|
|
|
+ if match:
|
|
|
+ clusters[match[0]].append(string)
|
|
|
+ else:
|
|
|
+ clusters[string] = [ string ]
|
|
|
+ return clusters
|
|
|
+
|
|
|
+
|
|
|
+def proc_row(row):
|
|
|
+ elmts=row.split(' ')
|
|
|
+ for elmt in elmts:
|
|
|
+ if kwdict.get(elmt) is None:
|
|
|
+ kwdict[elmt]=1
|
|
|
+ else:
|
|
|
+ kwdict[elmt]+=1
|
|
|
+
|
|
|
+
|
|
|
+def save_upload_file_tmp(file: UploadFile) -> Path:
|
|
|
+ try:
|
|
|
+ suffix = Path(file.filename).suffix
|
|
|
+ with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
|
+ shutil.copyfileobj(file.file, tmp)
|
|
|
+ tmp_path = Path(tmp.name)
|
|
|
+ finally:
|
|
|
+ file.file.close()
|
|
|
+ return tmp_path
|
|
|
+
|
|
|
+
|
|
|
+@app.post("/kwtree")
|
|
|
+async def kwtree(file: UploadFile = File(...)):
|
|
|
+ destination_file_path = "/Users/zooeytsai/Downloads/" + file.filename
|
|
|
+ with codecs.open(destination_file_path,'r','utf-16') as out_file:
|
|
|
+ # with codecs.open(file.file) as csvfile:
|
|
|
+ # csv_reader = csv.reader(codecs.iterdecode(file.file, 'utf-8'))
|
|
|
+ csvfile = csv.reader(out_file, delimiter='\t', quotechar='|')
|
|
|
+ # spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
|
|
|
+ kwdict = {}
|
|
|
+ addict = {}
|
|
|
+ head = True
|
|
|
+ rowlst = []
|
|
|
+ for row in csvfile:
|
|
|
+ if head:
|
|
|
+ head = False
|
|
|
+ continue
|
|
|
+ ll = len(row)
|
|
|
+ proc_row(row[0])
|
|
|
+ if row not in rowlst:
|
|
|
+ rowlst.append(row[0])
|
|
|
+ head = True
|
|
|
+
|
|
|
+ clusters = gcm0(rowlst)
|
|
|
+ keys = []
|
|
|
+ for k, v in clusters.items():
|
|
|
+ # if len(v) > 20:
|
|
|
+ keys.append(k)
|
|
|
+ for x in v:
|
|
|
+ G.add_edge(k, x, weight=1, label='')
|
|
|
+
|
|
|
+ already_dict = {}
|
|
|
+ from strsimpy.qgram import QGram
|
|
|
+ qgram = QGram(2)
|
|
|
+ for k1 in keys:
|
|
|
+ for k2 in keys:
|
|
|
+ if k1 != k2:
|
|
|
+ if qgram.distance(k1, k2) <= 12:
|
|
|
+ if already_dict.get(k1) is None and already_dict.get(k2) is None:
|
|
|
+ already_dict[k1] = 1
|
|
|
+ already_dict[k2] = 1
|
|
|
+ G.add_edge(k1, k2, weight=1, label='')
|
|
|
+
|
|
|
+ pyG = Network(height="100%", width="100%", bgcolor="#444444", font_color="white")
|
|
|
+ pyG.set_options("""
|
|
|
+ const options = {
|
|
|
+ "nodes" : {
|
|
|
+ "font" : {
|
|
|
+ "size" : "30",
|
|
|
+ "color" : "#ffffff"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "physics": {
|
|
|
+ "forceAtlas2Based": {
|
|
|
+ "springLength": 100
|
|
|
+ },
|
|
|
+ "maxVelocity": 150,
|
|
|
+ "minVelocity": 0.28,
|
|
|
+ "solver": "forceAtlas2Based"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """)
|
|
|
+ G.remove_edges_from(nx.selfloop_edges(G))
|
|
|
+ pyG.from_nx(G)
|
|
|
+
|
|
|
+ # pyG.show_buttons(filter_=['physics'])
|
|
|
+ news_file = random.randint(0,100)
|
|
|
+ pyG.show(f'news{news_file}.html')
|
|
|
+ check_file = False
|
|
|
+ # while
|
|
|
+ # print(clusters)
|
|
|
+
|
|
|
+ # sys.exit()
|
|
|
+
|
|
|
+ return FileResponse(f'/Users/zooeytsai/kw_tools/choozmo/news{news_file}.html',media_type='text/html')
|