| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178 | 
							- from instaloader import Instaloader, Profile
 
- import traceback
 
- import copy
 
- import operator
 
- import dataset
 
- import pandas as pd
 
- import networkx as nx
 
- #import pysftp
 
- import codecs
 
- import pyvis
 
- import sys
 
- import pickle
 
- import os
 
- import searchconsole
 
- from pyvis.network import Network
 
- import jieba
 
- pname='hhh.rb'
 
- db = dataset.connect('sqlite:///'+pname+".db")
 
- table=db['tmp']
 
- #pname='cont'
 
- #pname='damanwoo'
 
- #pname='drama'
 
- #pname='news'
 
- #pname='www'
 
- #pname='ipromise'
 
- #pname='sports'
 
- #pname='rumor'
 
- #pname='korea'
 
- pname='hhh.rb'
 
- rid=0
 
- def get_css():
 
-     fr=codecs.open('jared/data/css.txt','r','utf-8')
 
-     lines=fr.readlines()
 
-     content=' '.join(lines)
 
-     fr.close()
 
-     return content
 
- def modify_file(fname):
 
-     fr=codecs.open(fname,'r','utf-8')
 
-     lines=fr.readlines()
 
-     fr.close()
 
- #    css=get_css()
 
-     css=''
 
-     content_output=''
 
-     for l in lines:
 
-         if '<body>' in l[0:10]:
 
-             content_output+=l
 
-             content_output+='\n<div id="google">\n'
 
-             continue
 
-         if '<style type="text' in l[0:22]:
 
-             content_output+=l
 
-             content_output+="\n"+css+"\n"
 
-             continue
 
-         if '<div id = "mynetwork"' in l[0:30]:
 
-             content_output+=l
 
-             content_output+='\n</div>\n'
 
-             continue
 
-         content_output+=l
 
-     fw=codecs.open("mod_"+fname,'w','utf-8')
 
-     fw.write(content_output)
 
-     fw.close()
 
- def gen_pic():
 
-     G=None
 
- #    if os.path.exists(pname):
 
- #        G = pickle.load( open( pname, "rb" ) )
 
- #    else:
 
- #        G = nx.Graph()
 
-     G = nx.Graph()
 
-     finallist=[]
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having ) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   ) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   having count(url) >20) as tbl1 ) order by q')
 
- #    cursor=db.query('select distinct q,url from tmp where url in (select distinct url from  (select url,count(q) from tmp where length(q)> 2 group by url   having count(q) >5 and count(q)<10 ) as tbl1) order by url')
 
-     cursor=db.query('select distinct q,url from tmp where url in (select distinct url from  (select url,count(q) from tmp where length(q)> 2 group by url   having count(q) >4 and count(q)<9 ) as tbl1) order by url')
 
-     urldict={}
 
-     for c in cursor:
 
-         url=c['url'].replace('https://www.hhh.com.tw','')
 
-         url=url.replace('https://hhh.com.tw','')
 
-         url=url.replace('https://m.hhh.com.tw','')
 
-         q=c['q']
 
-         if urldict.get(url) is None:
 
-             urldict[url]=[q]
 
-         else:
 
-             urldict[url].append(q)
 
-     print(len(urldict.keys()))
 
-     cnt=0
 
-     for k,v in urldict.items():
 
-         if len(v)>=2:
 
-             for itm in v:
 
-                 G.add_edge(k,itm,weight=3,width=3,borderwidth=3)
 
-                 cnt+=1
 
-                 if cnt%1000 == 0:
 
-                     print(cnt)
 
- #            G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
 
- #    pickle.dump( G, open( pname, "wb" ) )
 
- #    G2 = [G.subgraph(c).copy() for c in nx.connected_components(G)]
 
- #    remove = [node for node,degree in dict(G.degree()).items() if degree <2]
 
- #    G.remove_nodes_from(remove)
 
-     remove=[]
 
- #    for n in G.nodes:
 
- #        if '承諾' in n:
 
- #            remove.append(n)
 
- #        if 'promise' in n:
 
- #            remove.append(n)
 
- #    G.remove_nodes_from(remove)
 
-     to_remove=[]
 
-     for n in G:
 
-         dg=G.degree(n)
 
-         if dg > 15:
 
-             to_remove.append(n)
 
-     G.remove_nodes_from(to_remove)
 
-     G.remove_edges_from(nx.selfloop_edges(G))
 
-     G.remove_nodes_from(list(nx.isolates(G)))
 
- #    lst= [G.subgraph(c).copy() for c in nx.connected_components(G)]
 
- #    lst=[]
 
- #    for c in nx.connected_components(G):
 
- #        cc=G.subgraph(c).copy()
 
- #        if cc.number_of_nodes()>7:
 
- #            lst.append(cc)
 
- #        if nx.diameter(cc, e=None, usebounds=False)>1:
 
- #            lst.append(cc)
 
- #    G2=nx.compose_all(lst)
 
-     G2=G
 
- #    pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
 
-     pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
 
-     pyG.from_nx(G2)
 
-     pyG.show(pname+'.html')
 
-     modify_file(pname+'.html')
 
- #    cnopts = pysftp.CnOpts()
 
- #    cnopts.hostkeys = None
 
- #    s = pysftp.Connection(host='www.choozmo.com', username='jared', password='sstc5202',cnopts=cnopts)
 
- #    local_path = "mod_"+pname+".html"
 
- #    remote_path = "/home/nginx/farmoutput/tags/"+"mod_"+pname+".html"
 
- #    s.put(local_path, remote_path)
 
-     return finallist
 
- gen_pic()
 
- #r=checkig('信用卡')
 
- #print(r)
 
- #        network.on( 'click', function(properties) {
 
- #    var ids = properties.nodes;
 
- #    var clickedNodes = nodes.get(ids);
 
- # var copyText = clickedNodes[0].label;
 
- # var promise = navigator.clipboard.writeText(copyText);
 
- #//    console.log('clicked nodes:', clickedNodes);
 
- #});
 
 
  |