|
@@ -1,10 +1,10 @@
|
|
|
from selenium import webdriver
|
|
|
-import random
|
|
|
+from random import randint
|
|
|
import time
|
|
|
from requests.cookies import RequestsCookieJar
|
|
|
import requests
|
|
|
from requests.adapters import HTTPAdapter
|
|
|
-from bs4 import BeautifulSoup
|
|
|
+from bs4 import BeautifulSoup
|
|
|
import json
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
from selenium.webdriver.chrome.service import Service
|
|
@@ -13,9 +13,10 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
|
from selenium.webdriver.support import expected_conditions as ec
|
|
|
import dataset
|
|
|
import pymysql
|
|
|
+pymysql.install_as_MySQLdb()
|
|
|
|
|
|
|
|
|
-account = ['enjoylisteningswift']
|
|
|
+account = ['chenlinrain']
|
|
|
pd = ['weareyoung12']
|
|
|
|
|
|
path = '/Users/zooeytsai/Downloads/chromedriver'
|
|
@@ -25,7 +26,7 @@ options = Options()
|
|
|
options.add_argument("--headless")
|
|
|
|
|
|
def download_post():
|
|
|
- hashtag = '台菜餐廳'
|
|
|
+ hashtag = '寵物'
|
|
|
url = f"https://www.instagram.com/explore/tags/{hashtag}/"
|
|
|
s = Service(path)
|
|
|
driver = webdriver.Chrome(service=s)
|
|
@@ -61,19 +62,20 @@ def download_post():
|
|
|
driver.implicitly_wait(3)
|
|
|
number_of_posts+=1
|
|
|
if len(link)>number_of_posts:
|
|
|
- time.sleep(random(180,300))
|
|
|
+ time.sleep(randint(180,300))
|
|
|
number_of_posts == 0
|
|
|
- if new_height == last_height:
|
|
|
+ if len(link)>10000:
|
|
|
break
|
|
|
|
|
|
driver.close()
|
|
|
print(len(link))
|
|
|
return link
|
|
|
|
|
|
-db = dataset.connect('mysql://root:jondae350@localhost:3306/ig_tags?charset=utf8mb4')
|
|
|
-table = db['ig_tags']
|
|
|
+
|
|
|
+
|
|
|
def hashtag():
|
|
|
hashtag = []
|
|
|
+ c=0
|
|
|
for url in download_post():
|
|
|
s = Service(path)
|
|
|
driver = webdriver.Chrome(service=s)
|
|
@@ -84,6 +86,10 @@ def hashtag():
|
|
|
for tag in hashtags:
|
|
|
print(tag.text)
|
|
|
hashtag.append(tag)
|
|
|
+ c+=1
|
|
|
+ if c>10:
|
|
|
+ time.sleep(3)
|
|
|
+ c=0
|
|
|
driver.close()
|
|
|
|
|
|
db = pymysql.connect(host='localhost',
|
|
@@ -93,7 +99,7 @@ def hashtag():
|
|
|
cur = db.cursor()
|
|
|
query = "ALTER TABLE ig_tags ADD taiwan_food TEXT(100)"
|
|
|
for i in hashtag:
|
|
|
- cur.execute('INSERT INTO ig_tags (taiwan_food) VALUES (%s)', i)
|
|
|
+ cur.execute('INSERT INTO ig_tags (taiwan_food) VALUES (%s)',i)
|
|
|
db.commit()
|
|
|
db.close()
|
|
|
return hashtag
|