|
@@ -1,4 +1,3 @@
|
|
|
-# coding:utf-8
|
|
|
from selenium import webdriver
|
|
|
from random import randint
|
|
|
import time
|
|
@@ -16,6 +15,17 @@ import pandas
|
|
|
import os
|
|
|
import ast
|
|
|
import pymysql
|
|
|
+<<<<<<< HEAD
|
|
|
+
|
|
|
+pymysql.install_as_MySQLdb()
|
|
|
+import setting
|
|
|
+
|
|
|
+
|
|
|
+account = ['chenlinrain','enjoylisteningswift','novak_goodenough']
|
|
|
+
|
|
|
+account = 'chenlinrain'
|
|
|
+password = 'weareyoung12'
|
|
|
+=======
|
|
|
|
|
|
pymysql.install_as_MySQLdb()
|
|
|
import pd
|
|
@@ -23,25 +33,41 @@ from setting import rua
|
|
|
|
|
|
account = ['chenlinrain', 'aruikuwasaki', 'enjoylisteningswift', 'novak_goodenough']
|
|
|
password = ['weareyoung12']
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
|
|
|
path = 'C:\\Users\\user\\Downloads\\chromedriver_99\\chromedriver'
|
|
|
|
|
|
options = webdriver.ChromeOptions()
|
|
|
+<<<<<<< HEAD
|
|
|
+options.add_argument("--headless") # 視窗背後執行
|
|
|
+options.add_argument("user-agent=%s" % setting.rua())
|
|
|
+print(setting.rua())
|
|
|
+=======
|
|
|
options.add_argument("--headless") # 視窗背後執行
|
|
|
options.add_argument("user-agent=%s" % rua())
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
|
|
|
hashtag = '上課平台'
|
|
|
|
|
|
|
|
|
def download_post():
|
|
|
+<<<<<<< HEAD
|
|
|
+ sbaccount = 'chenlinrain'
|
|
|
+ sbpd = password
|
|
|
+ hashtag = '實體課'
|
|
|
+=======
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
url = f"https://www.instagram.com/explore/tags/{hashtag}/"
|
|
|
s = Service(path)
|
|
|
- driver = webdriver.Chrome(service=s, options=options)
|
|
|
+ driver = webdriver.Chrome(executable_path=path, options=options)
|
|
|
driver.implicitly_wait(3)
|
|
|
driver.get('https://www.instagram.com/')
|
|
|
time.sleep(5)
|
|
|
+<<<<<<< HEAD
|
|
|
+=======
|
|
|
sbaccount = account[4]
|
|
|
sbpd = password[0]
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
driver.find_element(By.NAME, 'username').send_keys(sbaccount)
|
|
|
driver.find_element(By.NAME, 'password').send_keys(sbpd)
|
|
|
time.sleep(3)
|
|
@@ -52,11 +78,11 @@ def download_post():
|
|
|
driver.get(url)
|
|
|
time.sleep(3)
|
|
|
limit_of_posts = 10
|
|
|
- limit_of_scroll = 100
|
|
|
+ limit_of_scroll = 40
|
|
|
c = 0
|
|
|
c_sroll = 0
|
|
|
link = []
|
|
|
- last_height = driver.execute_script("return document.body.scrollHeight")
|
|
|
+
|
|
|
while True:
|
|
|
print(c, c_sroll)
|
|
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
|
@@ -69,6 +95,17 @@ def download_post():
|
|
|
link.append(elem['href'])
|
|
|
c += 1
|
|
|
c_sroll += 1
|
|
|
+<<<<<<< HEAD
|
|
|
+ if c > limit_of_posts:
|
|
|
+ print('中場休息')
|
|
|
+ time.sleep(randint(15, 30))
|
|
|
+ c = 0
|
|
|
+ if c_sroll > limit_of_scroll:
|
|
|
+ break
|
|
|
+ print(len(link))
|
|
|
+ driver.close()
|
|
|
+ print(len(link))
|
|
|
+=======
|
|
|
new_height = driver.execute_script("return document.body.scrollHeight")
|
|
|
if new_height == last_height:
|
|
|
print('沒有更多文章了')
|
|
@@ -82,6 +119,7 @@ def download_post():
|
|
|
|
|
|
driver.close()
|
|
|
print('post總數', len(link))
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
with open('link.txt', 'w') as f:
|
|
|
f.write(str(link))
|
|
|
return link, hashtag
|
|
@@ -96,6 +134,12 @@ input_hashtag = ''
|
|
|
|
|
|
|
|
|
def hashtag():
|
|
|
+<<<<<<< HEAD
|
|
|
+ sbaccount = 'novak_goodenough'
|
|
|
+ sbpd = password
|
|
|
+ input_hashtag = 'face_to_face'
|
|
|
+=======
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
collect_hashtag = []
|
|
|
c = 0
|
|
|
rest = 0
|
|
@@ -103,12 +147,19 @@ def hashtag():
|
|
|
result = f.read()
|
|
|
link = ast.literal_eval(result)
|
|
|
s = Service(path)
|
|
|
+<<<<<<< HEAD
|
|
|
+ driver = webdriver.Chrome(executable_path=path, options=options)
|
|
|
+ driver.implicitly_wait(3)
|
|
|
+ driver.get('https://www.instagram.com/')
|
|
|
+ time.sleep(5)
|
|
|
+=======
|
|
|
driver = webdriver.Chrome(service=s, options=options)
|
|
|
driver.implicitly_wait(3)
|
|
|
driver.get('https://www.instagram.com/')
|
|
|
time.sleep(5)
|
|
|
sbaccount = account[1]
|
|
|
sbpd = password[0]
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
driver.find_element(By.NAME, 'username').send_keys(sbaccount)
|
|
|
driver.find_element(By.NAME, 'password').send_keys(sbpd)
|
|
|
time.sleep(3)
|
|
@@ -119,14 +170,36 @@ def hashtag():
|
|
|
driver.find_element(By.XPATH, '//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
|
|
|
time.sleep(3)
|
|
|
|
|
|
+<<<<<<< HEAD
|
|
|
+ for i,url in enumerate(link):
|
|
|
+=======
|
|
|
for i, url in enumerate(link):
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
print(i)
|
|
|
driver.get(f"https://www.instagram.com/{url}")
|
|
|
soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
hashtags = soup.find_all('a', class_='xil3i')
|
|
|
for tag in hashtags:
|
|
|
- collect_hashtag.append(tag.text)
|
|
|
print(tag.text)
|
|
|
+<<<<<<< HEAD
|
|
|
+ collect_hashtag.append(tag.text)
|
|
|
+ c += 1
|
|
|
+ rest+=1
|
|
|
+ if c > 10:
|
|
|
+ time.sleep(randint(5, 10))
|
|
|
+ c = 0
|
|
|
+ if rest>100:
|
|
|
+ time.sleep(randint(60,90))
|
|
|
+ rest=0
|
|
|
+ driver.close()
|
|
|
+ print(collect_hashtag)
|
|
|
+
|
|
|
+ db_company = pymysql.connect(host='db.ptt.cx',
|
|
|
+ user='choozmo',
|
|
|
+ password='pAssw0rd',
|
|
|
+ database='seo')
|
|
|
+ print('開始開始寫進db')
|
|
|
+=======
|
|
|
c += 1
|
|
|
rest += 1
|
|
|
if c > 10:
|
|
@@ -139,6 +212,7 @@ def hashtag():
|
|
|
print(collect_hashtag)
|
|
|
print('開始寫入db')
|
|
|
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
cur = db_company.cursor()
|
|
|
query_new_col = f"ALTER TABLE seo.ig_pet_2 ADD COLUMN {input_hashtag} VARCHAR(45) NULL"
|
|
|
cur.execute(query_new_col)
|
|
@@ -153,13 +227,21 @@ def hashtag():
|
|
|
print(i)
|
|
|
for i in collect_hashtag:
|
|
|
i = i.replace('#', '')
|
|
|
+<<<<<<< HEAD
|
|
|
+=======
|
|
|
print(i, id_number)
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
query_update = f"UPDATE seo.ig_pet_2 SET {input_hashtag}='{i}' where `id`='{id_number}'"
|
|
|
cur.execute(query_update)
|
|
|
- db_company.commit()
|
|
|
id_number += 1
|
|
|
+<<<<<<< HEAD
|
|
|
+ db_company.commit()
|
|
|
+ print(i,id_number)
|
|
|
+=======
|
|
|
print('post總數', len(link))
|
|
|
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
|
|
|
db_company.close()
|
|
|
+ print(len(link))
|
|
|
return hashtag
|
|
|
|
|
|
|