|
@@ -20,13 +20,13 @@ account = ['liftwotsai','chenlinrain','enjoylisteningswift']
|
|
pd = ['obl787iviate634','weareyoung12']
|
|
pd = ['obl787iviate634','weareyoung12']
|
|
|
|
|
|
path = '/Users/zooeytsai/Downloads/chromedriver'
|
|
path = '/Users/zooeytsai/Downloads/chromedriver'
|
|
-sbaccount = account[2]
|
|
|
|
|
|
+sbaccount = account[1]
|
|
sbpd = pd[1]
|
|
sbpd = pd[1]
|
|
options = Options()
|
|
options = Options()
|
|
options.add_argument("--headless")
|
|
options.add_argument("--headless")
|
|
|
|
|
|
def download_post():
|
|
def download_post():
|
|
- hashtag = '寵物'
|
|
|
|
|
|
+ hashtag = '寵物零食'
|
|
url = f"https://www.instagram.com/explore/tags/{hashtag}/"
|
|
url = f"https://www.instagram.com/explore/tags/{hashtag}/"
|
|
s = Service(path)
|
|
s = Service(path)
|
|
driver = webdriver.Chrome(service=s)
|
|
driver = webdriver.Chrome(service=s)
|
|
@@ -46,13 +46,14 @@ def download_post():
|
|
jar.set(i['name'], i['value'])
|
|
jar.set(i['name'], i['value'])
|
|
driver.get(url)
|
|
driver.get(url)
|
|
time.sleep(3)
|
|
time.sleep(3)
|
|
- limit_of_posts = 100
|
|
|
|
|
|
+ limit_of_posts = 10
|
|
c = 0
|
|
c = 0
|
|
wait = WebDriverWait(driver, 10)
|
|
wait = WebDriverWait(driver, 10)
|
|
last_height = driver.execute_script(
|
|
last_height = driver.execute_script(
|
|
"window.scrollTo(0, document.body.scrollHeight);var scrolldown=document.body.scrollHeight;return scrolldown;")
|
|
"window.scrollTo(0, document.body.scrollHeight);var scrolldown=document.body.scrollHeight;return scrolldown;")
|
|
link = []
|
|
link = []
|
|
while True:
|
|
while True:
|
|
|
|
+ print(c)
|
|
href_element = wait.until(ec.visibility_of_all_elements_located((By.XPATH, "//a[@href]")))
|
|
href_element = wait.until(ec.visibility_of_all_elements_located((By.XPATH, "//a[@href]")))
|
|
for i in href_element:
|
|
for i in href_element:
|
|
link.append(i.get_attribute('href'))
|
|
link.append(i.get_attribute('href'))
|
|
@@ -63,9 +64,10 @@ def download_post():
|
|
driver.implicitly_wait(3)
|
|
driver.implicitly_wait(3)
|
|
c+=1
|
|
c+=1
|
|
if c > limit_of_posts:
|
|
if c > limit_of_posts:
|
|
- time.sleep(randint(180,300))
|
|
|
|
print('中場休息')
|
|
print('中場休息')
|
|
- if len(link)>10000:
|
|
|
|
|
|
+ time.sleep(randint(180,300))
|
|
|
|
+ c=0
|
|
|
|
+ if len(link)>5000:
|
|
break
|
|
break
|
|
|
|
|
|
driver.close()
|
|
driver.close()
|
|
@@ -77,7 +79,8 @@ def download_post():
|
|
def hashtag():
|
|
def hashtag():
|
|
hashtag = []
|
|
hashtag = []
|
|
c=0
|
|
c=0
|
|
- for url in download_post():
|
|
|
|
|
|
+ result = download_post()
|
|
|
|
+ for url in result:
|
|
s = Service(path)
|
|
s = Service(path)
|
|
driver = webdriver.Chrome(service=s)
|
|
driver = webdriver.Chrome(service=s)
|
|
driver.implicitly_wait(3)
|
|
driver.implicitly_wait(3)
|
|
@@ -92,7 +95,7 @@ def hashtag():
|
|
time.sleep(3)
|
|
time.sleep(3)
|
|
c=0
|
|
c=0
|
|
driver.close()
|
|
driver.close()
|
|
-
|
|
|
|
|
|
+ print(hashtag)
|
|
db = pymysql.connect(host='localhost',
|
|
db = pymysql.connect(host='localhost',
|
|
user='root',
|
|
user='root',
|
|
password='jondae350',
|
|
password='jondae350',
|
|
@@ -106,5 +109,5 @@ def hashtag():
|
|
return hashtag
|
|
return hashtag
|
|
|
|
|
|
|
|
|
|
-download_post()
|
|
|
|
|
|
+
|
|
hashtag()
|
|
hashtag()
|