|
@@ -85,10 +85,17 @@ def process_query(urllist, query, client):
|
|
|
if len(txt)>10:
|
|
|
for url in urllist:
|
|
|
if url in href:
|
|
|
- clickcand.append(el)
|
|
|
+ clickcand.append([el,n])
|
|
|
|
|
|
if len(clickcand)!=0:
|
|
|
- el = random.choice(clickcand)
|
|
|
+ '''for e in clickcand:
|
|
|
+ href = e[0].get_attribute('href')
|
|
|
+ print(href)
|
|
|
+ print(e[0].text)
|
|
|
+ print("Rank: " + str(e[1]))
|
|
|
+ db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": e[0].text, "results": n})'''
|
|
|
+
|
|
|
+ el = random.choice(clickcand)[0]
|
|
|
domain_in_link += 1
|
|
|
print('clicked....')
|
|
|
href = el.get_attribute('href')
|
|
@@ -97,15 +104,13 @@ def process_query(urllist, query, client):
|
|
|
|
|
|
webdriver.ActionChains(driver).move_to_element(el).perform()
|
|
|
webdriver.ActionChains(driver).move_to_element(el).click().perform()
|
|
|
- print("Rank: " + str(n))
|
|
|
- db['sns_log'].insert({"kw": query, "ranking": n, "url": href, "dt": dt.now(), "client": client, "title": el.text})
|
|
|
duration = random.randint(40,60)
|
|
|
time.sleep(duration)
|
|
|
|
|
|
print(domain_in_link)
|
|
|
return 200
|
|
|
|
|
|
- return 0 # if not found and all articles in list exhausted
|
|
|
+ return 0 # if no articles found
|
|
|
|
|
|
|
|
|
|