|
@@ -16,6 +16,7 @@ import dataset
|
|
|
import traceback
|
|
|
import sys
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
+import timeit
|
|
|
|
|
|
add_tabs = [7,9,11,13,15,7,9,11,13,15,7,9,11,13,15,7,9,11,13,15]
|
|
|
|
|
@@ -39,7 +40,6 @@ def empty_query(q):
|
|
|
driver.get(googleurl)
|
|
|
time.sleep(3)
|
|
|
|
|
|
-
|
|
|
def process_query(domain, target_domain, brands, query):
|
|
|
print(query)
|
|
|
sleepoffset = 0
|
|
@@ -64,6 +64,9 @@ def process_query(domain, target_domain, brands, query):
|
|
|
googleurl = driver.current_url
|
|
|
print(driver.current_url)
|
|
|
|
|
|
+ if "sorry" in googleurl:
|
|
|
+ return 444
|
|
|
+
|
|
|
elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
|
|
|
|
|
|
print (len(elmts))
|
|
@@ -72,6 +75,7 @@ def process_query(domain, target_domain, brands, query):
|
|
|
for el in elmts:
|
|
|
href=el.get_attribute('href')
|
|
|
txt=el.text
|
|
|
+ print(href)
|
|
|
if len(txt)>10:
|
|
|
if domain in href:
|
|
|
domain_in_link += 1
|
|
@@ -85,7 +89,7 @@ def process_query(domain, target_domain, brands, query):
|
|
|
new_windows_count = add_tabs[random.randint(0,19)]
|
|
|
print(str(new_windows_count) + " new tabs")
|
|
|
for i in range (0,new_windows_count):
|
|
|
- print("Tab" + str(i+1))
|
|
|
+ print("Tab " + str(i+1))
|
|
|
#original_window = driver.current_window_handle
|
|
|
#driver.switch_to.new_window('window')
|
|
|
#driver.get(href)
|
|
@@ -111,6 +115,7 @@ def process_query(domain, target_domain, brands, query):
|
|
|
|
|
|
|
|
|
print(domain_in_link)
|
|
|
+ return 200
|
|
|
|
|
|
|
|
|
def run_once(domain, target_domain, brands, query):
|
|
@@ -129,20 +134,32 @@ def run_once(domain, target_domain, brands, query):
|
|
|
driver.delete_all_cookies()
|
|
|
driver.set_window_size(1400,1000)
|
|
|
|
|
|
- process_query(domain, target_domain, brands, query)
|
|
|
- time.sleep(3)
|
|
|
+ statuscode = process_query(domain, target_domain, brands, query)
|
|
|
driver.quit()
|
|
|
|
|
|
+ return statuscode
|
|
|
+
|
|
|
#execution starts here
|
|
|
|
|
|
def execute(domain, target_domain, brands, query_list):
|
|
|
while True:
|
|
|
+ print("Ctrl+C or Ctrl+Z to stop.")
|
|
|
+ st = timeit.default_timer()
|
|
|
try:
|
|
|
- run_once(domain, target_domain, brands, random.choice(query_list))
|
|
|
+ statuscode = run_once(domain, target_domain, brands, random.choice(query_list))
|
|
|
except:
|
|
|
traceback.print_exc()
|
|
|
- sleepint = random.randint(75,90)
|
|
|
- sleepint = sleepint - sleepoffset
|
|
|
- print("Completed (" + str(sleepint) + ")")
|
|
|
- if sleepint > 0:
|
|
|
- time.sleep(sleepint)
|
|
|
+ timetaken = timeit.default_timer()-st
|
|
|
+ print("Time taken: " + str(timetaken))
|
|
|
+
|
|
|
+ print("Process returned with " + str(statuscode))
|
|
|
+ if statuscode == 444:
|
|
|
+ print("You have been caught!!! Program terminating.")
|
|
|
+ break
|
|
|
+
|
|
|
+ extrasleep = 0
|
|
|
+ if(timetaken < 70):
|
|
|
+ extrasleep = 70 - timetaken
|
|
|
+ print("Ctrl+C or Ctrl+Z to stop now.")
|
|
|
+ print("You have " + str(10 + extrasleep) + " seconds.")
|
|
|
+ time.sleep(10 + extrasleep)
|