| 
					
				 | 
			
			
				@@ -1,4 +1,3 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# coding:utf-8 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium import webdriver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from random import randint 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import time 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -16,6 +15,17 @@ import pandas 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import ast 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pymysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+pymysql.install_as_MySQLdb() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import setting 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+account = ['chenlinrain','enjoylisteningswift','novak_goodenough'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+account = 'chenlinrain' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+password = 'weareyoung12' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 pymysql.install_as_MySQLdb() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pd 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -23,25 +33,41 @@ from setting import rua 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 account = ['chenlinrain', 'aruikuwasaki', 'enjoylisteningswift', 'novak_goodenough'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 password = ['weareyoung12'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 path = 'C:\\Users\\user\\Downloads\\chromedriver_99\\chromedriver' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 options = webdriver.ChromeOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+options.add_argument("--headless") # 視窗背後執行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+options.add_argument("user-agent=%s" % setting.rua()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+print(setting.rua()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 options.add_argument("--headless")  # 視窗背後執行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 options.add_argument("user-agent=%s" % rua()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 hashtag = '上課平台' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def download_post(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    sbaccount = 'chenlinrain' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    sbpd = password 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    hashtag = '實體課' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     url = f"https://www.instagram.com/explore/tags/{hashtag}/" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     s = Service(path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver = webdriver.Chrome(service=s, options=options) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver = webdriver.Chrome(executable_path=path, options=options) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.implicitly_wait(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.get('https://www.instagram.com/') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sbaccount = account[4] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sbpd = password[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.find_element(By.NAME, 'username').send_keys(sbaccount) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.find_element(By.NAME, 'password').send_keys(sbpd) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(3) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -52,11 +78,11 @@ def download_post(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.get(url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     limit_of_posts = 10 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    limit_of_scroll = 100 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    limit_of_scroll = 40 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     c = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     c_sroll = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     link = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    last_height = driver.execute_script("return document.body.scrollHeight") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(c, c_sroll) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -69,6 +95,17 @@ def download_post(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 link.append(elem['href']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         c += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         c_sroll += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if c > limit_of_posts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('中場休息') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time.sleep(randint(15, 30)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            c = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if c_sroll > limit_of_scroll: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(len(link)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(len(link)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         new_height = driver.execute_script("return document.body.scrollHeight") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if new_height == last_height: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print('沒有更多文章了') 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -82,6 +119,7 @@ def download_post(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				      
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print('post總數', len(link)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     with open('link.txt', 'w') as f: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         f.write(str(link)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return link, hashtag 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -96,6 +134,12 @@ input_hashtag = '' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def hashtag(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    sbaccount = 'novak_goodenough' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    sbpd = password 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    input_hashtag = 'face_to_face' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     collect_hashtag = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     c = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     rest = 0 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -103,12 +147,19 @@ def hashtag(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     result = f.read() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     link = ast.literal_eval(result) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     s = Service(path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver = webdriver.Chrome(executable_path=path, options=options) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.implicitly_wait(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.get('https://www.instagram.com/') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver = webdriver.Chrome(service=s, options=options) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.implicitly_wait(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.get('https://www.instagram.com/') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sbaccount = account[1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sbpd = password[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.find_element(By.NAME, 'username').send_keys(sbaccount) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.find_element(By.NAME, 'password').send_keys(sbpd) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(3) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -119,14 +170,36 @@ def hashtag(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver.find_element(By.XPATH, '//*[@id="react-root"]/section/main/div/div/div/section/div/button').click() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				      
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for i,url in enumerate(link): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for i, url in enumerate(link): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(i) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.get(f"https://www.instagram.com/{url}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         soup = BeautifulSoup(driver.page_source, 'html.parser') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         hashtags = soup.find_all('a', class_='xil3i') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for tag in hashtags: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            collect_hashtag.append(tag.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print(tag.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            collect_hashtag.append(tag.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        c += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rest+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if c > 10: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time.sleep(randint(5, 10)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            c = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if rest>100: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time.sleep(randint(60,90)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            rest=0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(collect_hashtag) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    db_company = pymysql.connect(host='db.ptt.cx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                 user='choozmo', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                 password='pAssw0rd', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                 database='seo') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print('開始開始寫進db') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         c += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         rest += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if c > 10: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -139,6 +212,7 @@ def hashtag(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print(collect_hashtag) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print('開始寫入db') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				      
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     cur = db_company.cursor() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     query_new_col = f"ALTER TABLE seo.ig_pet_2 ADD COLUMN {input_hashtag} VARCHAR(45) NULL" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     cur.execute(query_new_col) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -153,13 +227,21 @@ def hashtag(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(i) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for i in collect_hashtag: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         i = i.replace('#', '') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(i, id_number) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         query_update = f"UPDATE seo.ig_pet_2 SET {input_hashtag}='{i}' where `id`='{id_number}'" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         cur.execute(query_update) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        db_company.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         id_number += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+<<<<<<< HEAD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        db_company.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(i,id_number) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+======= 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print('post總數', len(link)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     db_company.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(len(link)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return hashtag 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |