| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 | 
							- #from instaloader import Instaloader, Profile
 
- import traceback
 
- import copy
 
- import operator
 
- import dataset
 
- import pandas as pd
 
- import networkx as nx
 
- #import pysftp
 
- import codecs
 
- import pyvis
 
- import sys
 
- import pickle
 
- import os
 
- import searchconsole
 
- from pyvis.network import Network
 
- import jieba
 
- pname='hhh.rb'
 
- db = dataset.connect('sqlite:///'+pname+".db")
 
- def destroy_db():
 
-     global db
 
-     try:
 
-         db.query('drop table tmp')
 
-     except:
 
-         traceback.print_exc()
 
- table=db['tmp']
 
- #pname='cont'
 
- #pname='damanwoo'
 
- #pname='drama'
 
- #pname='news'
 
- #pname='www'
 
- #pname='ipromise'
 
- #pname='sports'
 
- #pname='rumor'
 
- #pname='korea'
 
- rid=0
 
- def get_css():
 
-     fr=codecs.open('jared/data/css.txt','r','utf-8')
 
-     lines=fr.readlines()
 
-     content=' '.join(lines)
 
-     fr.close()
 
-     return content
 
- def modify_file(fname):
 
-     fr=codecs.open(fname,'r','utf-8')
 
-     lines=fr.readlines()
 
-     fr.close()
 
- #    css=get_css()
 
-     css=''
 
-     content_output=''
 
-     for l in lines:
 
-         if '<body>' in l[0:10]:
 
-             content_output+=l
 
-             content_output+='\n<div id="google">\n'
 
-             continue
 
-         if '<style type="text' in l[0:22]:
 
-             content_output+=l
 
-             content_output+="\n"+css+"\n"
 
-             continue
 
-         if '<div id = "mynetwork"' in l[0:30]:
 
-             content_output+=l
 
-             content_output+='\n</div>\n'
 
-             continue
 
-         content_output+=l
 
-     fw=codecs.open("mod_"+fname,'w','utf-8')
 
-     fw.write(content_output)
 
-     fw.close()
 
- def checkig(pgnum):
 
-     global instl
 
-     global table
 
-     global pname
 
-     global rid
 
-     lst=[]
 
-     cntdict={}
 
-     codelist={}
 
-     idx=0
 
-     flag_break=False
 
-     fname=os.path.abspath(__file__)
 
-     elmts=fname.split(os.path.sep)
 
-     path2=os.path.sep.join(elmts[0:-1])
 
-     keysdir=path2+os.path.sep+'keys'+os.path.sep
 
- #    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
 
- #    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
 
- #account = searchconsole.authenticate(client_config='c:/keys/client_secret_162277274609-v1fsq5iscscl7e2ta4a8tc0og5tehl44.apps.googleusercontent.com.json',serialize='out.json')
 
-     account = searchconsole.authenticate(client_config='c:/keys/client_secret_162277274609-v1fsq5iscscl7e2ta4a8tc0og5tehl44.apps.googleusercontent.com.json',credentials='c:/keys/out.json')
 
- #account.redirect_uri = 'https://localhost'
 
- #http://localhost:8080
 
- #    account = searchconsole.authenticate(client_config=keysdir+'client_secret copy.json',credentials=keysdir+'credentials copy.json')
 
-     print(account.webproperties)
 
- #    sys.exit()
 
- #    webproperty = account['https://ipromise.com.tw/']
 
- #    webproperty = account['https://'+pname+'.face8ook.org/']
 
- #    webproperty = account['https://www.damanwoo.com/']
 
- #    webproperty = account['https://hhh.com.tw/']
 
-     webproperty = account['https://innews.com.tw/']
 
- #    report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
 
- #    report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
 
- #    report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
 
- #    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
 
-     report=webproperty.query.range('2022-04-01', '2022-04-16').dimension('page','query').get()
 
-     urlq={}
 
-     for r in report.rows:
 
-         if urlq.get(r[0]) is None:
 
-             urlq[r[0]]=[r[1]]
 
-         else:
 
-             urlq[r[0]].append(r[1])
 
- #    print(urlq)
 
-     allrows=[]
 
-     for k,v in urlq.items():
 
-         for q in v:
 
-             elmts=q.split(' ')
 
-             for elmt in elmts:
 
-                 table.insert({'q':elmt,'rid':rid,'url':k})
 
-         rid+=1
 
-         allrows.append([r[0],r[1] ])
 
-     db.commit()
 
- def gen_pic():
 
-     global db
 
-     G=None
 
- #    if os.path.exists(pname):
 
- #        G = pickle.load( open( pname, "rb" ) )
 
- #    else:
 
- #        G = nx.Graph()
 
-     G = nx.Graph()
 
-     finallist=[]
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having ) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   ) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   having count(url) >=3) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   having count(url) >=3) as tbl1 ) order by q')
 
- #    cursor=db.query('select q,rid,url from tmp where rid in (select distinct rid from  (select rid,count(q) from tmp where length(q)> 2 group by rid   having count(q) >=15) as tbl1 ) order by q')
 
-     cursor=db.query('select distinct q,rid from tmp where rid in (select distinct rid from  (select rid,count(q) from tmp where length(q)> 2 group by rid   having count(q) >=50) as tbl1 ) order by q')
 
-     riddict={}
 
-     prev=''
 
-     curnode=''
 
-     cururl=''
 
-     total_idx=0
 
-     cnt=0
 
-     for c in cursor:
 
-         print(str(c['rid'])+":"+c['q'])
 
-         G.add_edge(c['q'],str(c['rid']),weight=3,width=3,borderwidth=3)
 
-         cnt+=1
 
-     print(cnt)
 
- #    pickle.dump( G, open( pname, "wb" ) )
 
-     remove=[]
 
-     G.remove_edges_from(nx.selfloop_edges(G))
 
-     G.remove_nodes_from(list(nx.isolates(G)))
 
-     G2=G
 
-     pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
 
-     pyG.from_nx(G2)
 
-     pyG.show(pname+'.html')
 
- #    modify_file(pname+'.html')
 
-     return finallist
 
- #    cursor=db.query('(select q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 )')
 
- #destroy_db()
 
- #checkig('12')
 
- gen_pic()
 
 
  |