3 år sedan · b63b239310
--- a/2000ece3-455d-4cb5-b20e-ad798cf8bf23.json
+++ b/2000ece3-455d-4cb5-b20e-ad798cf8bf23.json
@@ -0,0 +1,27 @@
 
				+
			
 
				+    {
			
 
				+       "access_token":"ya29.A0ARrdaM-8J7urxGZFJBi4jt4ORkrCofDLMI3SN_jNfFjt5HlRZnlQ_FCRqQNZupdR0HWhAgzOE92p-AjAaBpKwklGQGxM5m3byAjQsr8qHr237p1lsdWif0kffHt2wpNxowuy5UyrzxbrSsj0zmuzZ5JgthRd",
			
 
				+       "client_id":"184319941539-gdh6p4v400g0f5fj076bp7l3cf7vn7ha.apps.googleusercontent.com",
			
 
				+       "client_secret":"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",
			
 
				+       "refresh_token":"1//0e3GKAc0Xl1V1CgYIARAAGA4SNwF-L9IrMmaPcxVGEA6J-yYeVzx8l9C3op0oiga7Ouw-_b7hv3enOhPwEixoH7pY3efL_aY6cSw",
			
 
				+       "token_expiry":"2020-10-27T18:03:48Z",
			
 
				+       "token_uri":"https://accounts.google.com/o/oauth2/token",
			
 
				+       "user_agent":null,
			
 
				+       "revoke_uri":"https://oauth2.googleapis.com/revoke",
			
 
				+       "id_token":null,
			
 
				+       "id_token_jwt":null,
			
 
				+       "token_response":{
			
 
				+          "access_token":"1//0e3GKAc0Xl1V1CgYIARAAGA4SNwF-L9IrMmaPcxVGEA6J-yYeVzx8l9C3op0oiga7Ouw-_b7hv3enOhPwEixoH7pY3efL_aY6cSw",
			
 
				+          "expires_in": 3599,
			
 
				+          "scope":"https://www.googleapis.com/auth/youtube.upload",
			
 
				+          "token_type":"Bearer"
			
 
				+       },
			
 
				+       "scopes":[
			
 
				+          "https://www.googleapis.com/auth/youtube.upload"
			
 
				+       ],
			
 
				+       "token_info_uri":"https://oauth2.googleapis.com/tokeninfo",
			
 
				+       "invalid":false,
			
 
				+       "_class":"OAuth2Credentials",
			
 
				+       "_module":"oauth2client.client"
			
 
				+    }
			
 
				+            
			
--- a/ig_tags/ig_selenium.py
+++ b/ig_tags/ig_selenium.py
@@ -11,97 +11,135 @@ from selenium.webdriver.chrome.service import Service
 
				 from selenium.webdriver.common.by import By
			
 
				 from selenium.webdriver.support.wait import WebDriverWait
			
 
				 from selenium.webdriver.support import expected_conditions as ec
			
 
				-import dataset
			
 
				+# import dataset
			
 
				+import ast
			
 
				 import pymysql
			
 
				+
			
 
				 pymysql.install_as_MySQLdb()
			
 
				 
			
 
				+import pd
			
 
				+import setting
			
 
				+
			
 
				 
			
 
				-account = ['chenlinrain']
			
 
				-pd = ['weareyoung12']
			
 
				+account = ['chenlinrain','enjoylisteningswift','novak_goodenough']
			
 
				+
			
 
				+account = pd.account
			
 
				+password = pd.password
			
 
				 
			
 
				 path = '/Users/zooeytsai/Downloads/chromedriver'
			
 
				 sbaccount = account[2]
			
 
				-sbpd = pd[1]
			
 
				-options = Options()
			
 
				-options.add_argument("--headless")
			
 
				+sbpd = password[1]
			
 
				+options = webdriver.ChromeOptions()
			
 
				+# options.add_argument("--headless") # 視窗背後執行
			
 
				+options.add_argument("user-agent=%s" % rua())
			
 
				+
			
 
				 
			
 
				 def download_post():
			
 
				-    hashtag = '寵物'
			
 
				+    hashtag = '寵物蛋糕'
			
 
				     url = f"https://www.instagram.com/explore/tags/{hashtag}/"
			
 
				     s = Service(path)
			
 
				-    driver = webdriver.Chrome(service=s)
			
 
				+    driver = webdriver.Chrome(service=s, options=options)
			
 
				     driver.implicitly_wait(3)
			
 
				     driver.get('https://www.instagram.com/')
			
 
				     time.sleep(5)
			
 
				-    driver.find_element(By.NAME,'username').send_keys(sbaccount)
			
 
				-    driver.find_element(By.NAME,'password').send_keys(sbpd)
			
 
				+    driver.find_element(By.NAME, 'username').send_keys(sbaccount)
			
 
				+    driver.find_element(By.NAME, 'password').send_keys(sbpd)
			
 
				     time.sleep(3)
			
 
				-    driver.find_element(By.XPATH,'//*[@id="loginForm"]/div/div[3]/button').click()  # 登入
			
 
				-    driver.implicitly_wait(3)
			
 
				-    driver.find_element(By.XPATH,'//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
			
 
				+    driver.find_element(By.XPATH, '//*[@id="loginForm"]/div/div[3]/button').click()  # 登入
			
 
				+    time.sleep(3)
			
 
				+    driver.find_element(By.XPATH, '//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
			
 
				     time.sleep(3)
			
 
				-    cookie = driver.get_cookies()
			
 
				-    jar = RequestsCookieJar()
			
 
				-    for i in cookie:
			
 
				-        jar.set(i['name'], i['value'])
			
 
				     driver.get(url)
			
 
				     time.sleep(3)
			
 
				-    number_of_posts = 100
			
 
				-    wait = WebDriverWait(driver, 10)
			
 
				-    last_height = driver.execute_script(
			
 
				-        "window.scrollTo(0, document.body.scrollHeight);var scrolldown=document.body.scrollHeight;return scrolldown;")
			
 
				+    limit_of_posts = 10
			
 
				+    limit_of_scroll = 100
			
 
				+    c = 0
			
 
				+    c_sroll = 0
			
 
				     link = []
			
 
				+    
			
 
				     while True:
			
 
				-        href_element = wait.until(ec.visibility_of_all_elements_located((By.XPATH, "//a[@href]")))
			
 
				-        for i in href_element:
			
 
				-            link.append(i.get_attribute('href'))
			
 
				-        print(link)
			
 
				+        print(c, c_sroll)
			
 
				         driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
			
 
				         time.sleep(3)
			
 
				-        new_height = driver.execute_script("return document.body.scrollHeight")
			
 
				-        driver.implicitly_wait(3)
			
 
				-        number_of_posts+=1
			
 
				-        if len(link)>number_of_posts:
			
 
				-            time.sleep(randint(180,300))
			
 
				-            number_of_posts == 0
			
 
				-        if len(link)>10000:
			
 
				+        html = driver.page_source
			
 
				+        soup = BeautifulSoup(html, 'lxml')
			
 
				+        for elem in soup.select('article div div div div a'):
			
 
				+            print(elem['href'])
			
 
				+            if elem['href'] not in link:
			
 
				+                link.append(elem['href'])
			
 
				+        c += 1
			
 
				+        c_sroll += 1
			
 
				+        if c > limit_of_posts:
			
 
				+            print('中場休息')
			
 
				+            time.sleep(randint(15, 30))
			
 
				+            c = 0
			
 
				+        if c_sroll > limit_of_scroll:
			
 
				             break
			
 
				-
			
 
				+        print(len(link))
			
 
				     driver.close()
			
 
				     print(len(link))
			
 
				-    return link
			
 
				-
			
 
				+    with open('link.txt', 'w') as f:
			
 
				+        f.write(str(link))
			
 
				+    return link, hashtag
			
 
				 
			
 
				 
			
 
				 def hashtag():
			
 
				-    hashtag = []
			
 
				-    c=0
			
 
				-    for url in download_post():
			
 
				-        s = Service(path)
			
 
				-        driver = webdriver.Chrome(service=s)
			
 
				-        driver.implicitly_wait(3)
			
 
				-        driver.get(url)
			
 
				+    input_hashtag = 'pet_cake'
			
 
				+    collect_hashtag = []
			
 
				+    c = 0
			
 
				+    f = open('link.txt', 'r')
			
 
				+    result = f.read()
			
 
				+    link = ast.literal_eval(result)
			
 
				+    s = Service(path)
			
 
				+    driver = webdriver.Chrome(service=s, options=options)
			
 
				+    driver.implicitly_wait(3)
			
 
				+    driver.get('https://www.instagram.com/')
			
 
				+    time.sleep(5)
			
 
				+    driver.find_element(By.NAME, 'username').send_keys(sbaccount)
			
 
				+    driver.find_element(By.NAME, 'password').send_keys(sbpd)
			
 
				+    time.sleep(3)
			
 
				+    
			
 
				+    driver.find_element(By.XPATH, '//*[@id="loginForm"]/div/div[3]/button').click()  # 登入
			
 
				+    driver.implicitly_wait(3)
			
 
				+    
			
 
				+    driver.find_element(By.XPATH, '//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
			
 
				+    time.sleep(3)
			
 
				+    
			
 
				+    for url in link:
			
 
				+        driver.get(f"https://www.instagram.com/{url}")
			
 
				         soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				         hashtags = soup.find_all('a', class_='xil3i')
			
 
				         for tag in hashtags:
			
 
				             print(tag.text)
			
 
				-            hashtag.append(tag)
			
 
				-        c+=1
			
 
				-        if c>10:
			
 
				-            time.sleep(3)
			
 
				-            c=0
			
 
				+            collect_hashtag.append(tag.text)
			
 
				+        c += 1
			
 
				+        if c > 10:
			
 
				+            time.sleep(randint(5, 10))
			
 
				+            c = 0
			
 
				     driver.close()
			
 
				-
			
 
				-    db = pymysql.connect(host='localhost',
			
 
				-                         user='root',
			
 
				-                         password='jondae350',
			
 
				-                         database='ig_tags')
			
 
				-    cur = db.cursor()
			
 
				-    query = "ALTER TABLE ig_tags ADD taiwan_food TEXT(100)"
			
 
				-    for i in hashtag:
			
 
				-        cur.execute('INSERT INTO ig_tags (taiwan_food) VALUES (%s)',i)
			
 
				-    db.commit()
			
 
				-    db.close()
			
 
				+    print(collect_hashtag)
			
 
				+    db_company = pymysql.connect(host='db.ptt.cx',
			
 
				+                                 user='choozmo',
			
 
				+                                 password='pAssw0rd',
			
 
				+                                 database='seo')
			
 
				+    cur = db_company.cursor()
			
 
				+    query_new_col = f"ALTER TABLE seo.ig_pet ADD COLUMN {input_hashtag} VARCHAR(45) NULL"
			
 
				+    cur.execute(query_new_col)
			
 
				+    id_number = 0
			
 
				+    cur.execute("select * from seo.ig_pet order by `index` desc limit 1")
			
 
				+    last_id = cur.fetchall()[0][0]
			
 
				+    insert_row = len(collect_hashtag) - last_id
			
 
				+    for i in range(0, insert_row):
			
 
				+        query_insert = f"INSERT INTO seo.ig_pet ({input_hashtag}) VALUES ('')"
			
 
				+        cur.execute(query_insert)
			
 
				+    for i in collect_hashtag:
			
 
				+        i = i.replace('#', '')
			
 
				+        query_update = f"UPDATE seo.ig_pet SET {input_hashtag}='{i}' where `index`='{id_number}'"
			
 
				+        cur.execute(query_update)
			
 
				+        id_number += 1
			
 
				+    
			
 
				+    db_company.commit()
			
 
				+    db_company.close()
			
 
				     return hashtag
			
 
				 
			
 
				 
			
--- a/ig_tags/pd.py
+++ b/ig_tags/pd.py
@@ -0,0 +1,2 @@
 
				+account = ['chenlinrain','enjoylisteningswift','novak_goodenough']
			
 
				+password = ['weareyoung12']
			
--- a/ig_tags/setting.py
+++ b/ig_tags/setting.py
@@ -0,0 +1,13 @@
 
				+import random
			
 
				+
			
 
				+def rua():
			
 
				+    pool = [
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
			
 
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
			
 
				+    ]
			
 
				+    return random.choice(pool)
			
--- a/ig_tags/ig_tags.py
+++ b/ig_tags/ig_tags.py