zooeytsai 3 år sedan
förälder
incheckning
6b03165551
1 ändrade filer med 36 tillägg och 30 borttagningar
  1. 36 30
      ig_tags/ig_selenium.py

+ 36 - 30
ig_tags/ig_selenium.py

@@ -11,26 +11,28 @@ from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as ec
-import dataset
+# import dataset
 import pymysql
 pymysql.install_as_MySQLdb()
+import pd
+from setting import rua
 
-
-account = ['liftwotsai','chenlinrain','enjoylisteningswift']
-pd = ['obl787iviate634','weareyoung12']
+account = pd.account
+password = pd.password
 
 path = '/Users/zooeytsai/Downloads/chromedriver'
 sbaccount = account[1]
-sbpd = pd[1]
-options = Options()
-options.add_argument("--headless")
+sbpd = password[1]
+options = webdriver.ChromeOptions()
+# options.add_argument("--headless") # 視窗背後執行
+options.add_argument("user-agent=%s" % rua())
 
 
 def download_post():
-    hashtag = '寵物零食'
+    hashtag = '寵物'
     url = f"https://www.instagram.com/explore/tags/{hashtag}/"
     s = Service(path)
-    driver = webdriver.Chrome(service=s)
+    driver = webdriver.Chrome(service=s, options=options)
     driver.implicitly_wait(3)
     driver.get('https://www.instagram.com/')
     time.sleep(5)
@@ -41,14 +43,10 @@ def download_post():
     driver.implicitly_wait(3)
     driver.find_element(By.XPATH,'//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
     time.sleep(3)
-    cookie = driver.get_cookies()
-    jar = RequestsCookieJar()
-    for i in cookie:
-        jar.set(i['name'], i['value'])
     driver.get(url)
     time.sleep(3)
-    limit_of_posts = 10
-    limit_of_scroll = 250
+    limit_of_posts = 1
+    limit_of_scroll = 3
     c = 0
     c_sroll=0
     wait = WebDriverWait(driver, 10)
@@ -70,10 +68,8 @@ def download_post():
         c_sroll+=1
         if c > limit_of_posts:
             print('中場休息')
-            time.sleep(randint(30,60))
+            time.sleep(randint(15,30))
             c=0
-        # if len(link)>500:
-        #     break
         if c_sroll>limit_of_scroll:
             break
     html = driver.page_source
@@ -83,16 +79,16 @@ def download_post():
             link.append(elem['href'])
     driver.close()
     print(len(link))
-    return link
-
+    return link, hashtag
 
 
 def hashtag():
-    hashtag = []
+    input_hashtag = 'pilo'
+    collect_hashtag = []
     c=0
-    result = download_post()
+    result = download_post()[0]
     s = Service(path)
-    driver = webdriver.Chrome(service=s)
+    driver = webdriver.Chrome(service=s,options=options)
     driver.implicitly_wait(3)
     driver.get('https://www.instagram.com/')
     time.sleep(5)
@@ -109,23 +105,33 @@ def hashtag():
         hashtags = soup.find_all('a', class_='xil3i')
         for tag in hashtags:
             print(tag.text)
-            hashtag.append(tag.text)
+            collect_hashtag.append(tag.text)
         c+=1
         if c>10:
-            time.sleep(3)
+            time.sleep(randint(5,10))
             c=0
     driver.close()
-    print(hashtag)
+    print(collect_hashtag)
     db = pymysql.connect(host='localhost',
                          user='root',
                          password='jondae350',
                          database='ig_tags')
     cur = db.cursor()
-    query = "ALTER TABLE ig_tags ADD health_product TEXT(100)"
+    query_new_col = f"ALTER TABLE ig_tags.new_table ADD COLUMN {input_hashtag} VARCHAR(45) NULL"
+    cur.execute(query_new_col)
     id_number = 1
-    for i in hashtag:
-        cur.execute(f'UPDATE ig_tags set snack={i} where id={id_number}')
-        id_number+=1
+    cur.execute("select * from ig_tags.new_table order by id desc limit 1")
+    last_id = cur.fetchall()[0][0]
+    insert_row = len(collect_hashtag)-last_id
+    for i in range(0, insert_row):
+        query_insert = f"INSERT INTO ig_tags.new_table ({input_hashtag}) VALUES ('')"
+        cur.execute(query_insert)
+    for i in collect_hashtag:
+        i = i.replace('#', '')
+        query_update = f"UPDATE ig_tags.new_table SET {input_hashtag}='{i}' where id='{id_number}'"
+        cur.execute(query_update)
+        id_number += 1
+
     db.commit()
     db.close()
     return hashtag