|
@@ -11,26 +11,28 @@ from selenium.webdriver.chrome.service import Service
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
|
|
from selenium.webdriver.support import expected_conditions as ec
|
|
|
-import dataset
|
|
|
+# import dataset
|
|
|
import pymysql
|
|
|
pymysql.install_as_MySQLdb()
|
|
|
+import pd
|
|
|
+from setting import rua
|
|
|
|
|
|
-
|
|
|
-account = ['liftwotsai','chenlinrain','enjoylisteningswift']
|
|
|
-pd = ['obl787iviate634','weareyoung12']
|
|
|
+account = pd.account
|
|
|
+password = pd.password
|
|
|
|
|
|
path = '/Users/zooeytsai/Downloads/chromedriver'
|
|
|
sbaccount = account[1]
|
|
|
-sbpd = pd[1]
|
|
|
-options = Options()
|
|
|
-options.add_argument("--headless")
|
|
|
+sbpd = password[1]
|
|
|
+options = webdriver.ChromeOptions()
|
|
|
+# options.add_argument("--headless") # 視窗背後執行
|
|
|
+options.add_argument("user-agent=%s" % rua())
|
|
|
|
|
|
|
|
|
def download_post():
|
|
|
- hashtag = '寵物零食'
|
|
|
+ hashtag = '寵物'
|
|
|
url = f"https://www.instagram.com/explore/tags/{hashtag}/"
|
|
|
s = Service(path)
|
|
|
- driver = webdriver.Chrome(service=s)
|
|
|
+ driver = webdriver.Chrome(service=s, options=options)
|
|
|
driver.implicitly_wait(3)
|
|
|
driver.get('https://www.instagram.com/')
|
|
|
time.sleep(5)
|
|
@@ -41,14 +43,10 @@ def download_post():
|
|
|
driver.implicitly_wait(3)
|
|
|
driver.find_element(By.XPATH,'//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
|
|
|
time.sleep(3)
|
|
|
- cookie = driver.get_cookies()
|
|
|
- jar = RequestsCookieJar()
|
|
|
- for i in cookie:
|
|
|
- jar.set(i['name'], i['value'])
|
|
|
driver.get(url)
|
|
|
time.sleep(3)
|
|
|
- limit_of_posts = 10
|
|
|
- limit_of_scroll = 250
|
|
|
+ limit_of_posts = 1
|
|
|
+ limit_of_scroll = 3
|
|
|
c = 0
|
|
|
c_sroll=0
|
|
|
wait = WebDriverWait(driver, 10)
|
|
@@ -70,10 +68,8 @@ def download_post():
|
|
|
c_sroll+=1
|
|
|
if c > limit_of_posts:
|
|
|
print('中場休息')
|
|
|
- time.sleep(randint(30,60))
|
|
|
+ time.sleep(randint(15,30))
|
|
|
c=0
|
|
|
- # if len(link)>500:
|
|
|
- # break
|
|
|
if c_sroll>limit_of_scroll:
|
|
|
break
|
|
|
html = driver.page_source
|
|
@@ -83,16 +79,16 @@ def download_post():
|
|
|
link.append(elem['href'])
|
|
|
driver.close()
|
|
|
print(len(link))
|
|
|
- return link
|
|
|
-
|
|
|
+ return link, hashtag
|
|
|
|
|
|
|
|
|
def hashtag():
|
|
|
- hashtag = []
|
|
|
+ input_hashtag = 'pilo'
|
|
|
+ collect_hashtag = []
|
|
|
c=0
|
|
|
- result = download_post()
|
|
|
+ result = download_post()[0]
|
|
|
s = Service(path)
|
|
|
- driver = webdriver.Chrome(service=s)
|
|
|
+ driver = webdriver.Chrome(service=s,options=options)
|
|
|
driver.implicitly_wait(3)
|
|
|
driver.get('https://www.instagram.com/')
|
|
|
time.sleep(5)
|
|
@@ -109,23 +105,33 @@ def hashtag():
|
|
|
hashtags = soup.find_all('a', class_='xil3i')
|
|
|
for tag in hashtags:
|
|
|
print(tag.text)
|
|
|
- hashtag.append(tag.text)
|
|
|
+ collect_hashtag.append(tag.text)
|
|
|
c+=1
|
|
|
if c>10:
|
|
|
- time.sleep(3)
|
|
|
+ time.sleep(randint(5,10))
|
|
|
c=0
|
|
|
driver.close()
|
|
|
- print(hashtag)
|
|
|
+ print(collect_hashtag)
|
|
|
db = pymysql.connect(host='localhost',
|
|
|
user='root',
|
|
|
password='jondae350',
|
|
|
database='ig_tags')
|
|
|
cur = db.cursor()
|
|
|
- query = "ALTER TABLE ig_tags ADD health_product TEXT(100)"
|
|
|
+ query_new_col = f"ALTER TABLE ig_tags.new_table ADD COLUMN {input_hashtag} VARCHAR(45) NULL"
|
|
|
+ cur.execute(query_new_col)
|
|
|
id_number = 1
|
|
|
- for i in hashtag:
|
|
|
- cur.execute(f'UPDATE ig_tags set snack={i} where id={id_number}')
|
|
|
- id_number+=1
|
|
|
+ cur.execute("select * from ig_tags.new_table order by id desc limit 1")
|
|
|
+ last_id = cur.fetchall()[0][0]
|
|
|
+ insert_row = len(collect_hashtag)-last_id
|
|
|
+ for i in range(0, insert_row):
|
|
|
+ query_insert = f"INSERT INTO ig_tags.new_table ({input_hashtag}) VALUES ('')"
|
|
|
+ cur.execute(query_insert)
|
|
|
+ for i in collect_hashtag:
|
|
|
+ i = i.replace('#', '')
|
|
|
+ query_update = f"UPDATE ig_tags.new_table SET {input_hashtag}='{i}' where id='{id_number}'"
|
|
|
+ cur.execute(query_update)
|
|
|
+ id_number += 1
|
|
|
+
|
|
|
db.commit()
|
|
|
db.close()
|
|
|
return hashtag
|