Ver código fonte

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

ming 2 anos atrás
pai
commit
a10d1d3101
100 arquivos alterados com 6105 adições e 37 exclusões
  1. 27 0
      .favorites.json
  2. 2 4
      .google-cookie
  3. 22 0
      .vscode/workbench.json
  4. 30 0
      INNNews/commerce.py
  5. 6 0
      INNNews/dbtest.py
  6. 146 0
      INNNews/ga_daily_test.py
  7. 135 0
      INNNews/general_clickbot.py
  8. 121 0
      INNNews/general_remote_click.py
  9. 61 0
      INNNews/get_spread.py
  10. 95 0
      INNNews/local_1777.py
  11. 131 0
      INNNews/local_general_clickbot.py
  12. 53 0
      INNNews/notifytest.py
  13. 77 0
      INNNews/notifytest2.py
  14. 53 0
      INNNews/routine_click.py
  15. 70 0
      INNNews/run_jared.py
  16. 39 0
      INNNews/run_sheet.py
  17. 86 0
      INNNews/run_sheet_2.py
  18. 60 0
      INNNews/run_sns.py
  19. 113 0
      INNNews/sns_clickbot.py
  20. 43 0
      INNNews/spread_test.py
  21. 109 0
      SEO/click_and_save.py
  22. 13 7
      SEO/clickbot_100.py
  23. 96 0
      SEO/crawl_web.py
  24. 97 0
      SEO/exp_100.py
  25. 53 0
      SEO/fb_comment.py
  26. 102 0
      SEO/fetch__url_content.py
  27. 108 0
      SEO/general_clickjob.py
  28. 141 0
      SEO/general_gsrack.py
  29. 76 0
      SEO/gnews_click.py
  30. 10 0
      SEO/gsc-save-credential.py
  31. 38 0
      SEO/month_kw_rank.py
  32. 117 0
      SEO/new_100.py
  33. 114 0
      SEO/pet100.py
  34. 149 0
      SEO/ranking_day.py
  35. 139 0
      SEO/ranking_day2.py
  36. 64 0
      SEO/run_gsrack.py
  37. 58 0
      SEO/run_ranking_day.py
  38. 8 0
      SEO/seo_notify.py
  39. 128 0
      SEO/tiny_click100.py
  40. 130 0
      SEO關聯圖_網址關鍵字.html
  41. BIN
      choozmo/__pycache__/gsearch_general.cpython-310.pyc
  42. BIN
      choozmo/__pycache__/gsearch_general.cpython-39.pyc
  43. 108 0
      choozmo/click_commerce.py
  44. 8 0
      choozmo/cryptotest.py
  45. 43 0
      choozmo/custom_profile_test.py
  46. 1 0
      choozmo/db_clickjob.py
  47. 101 0
      choozmo/fb_get_email.py
  48. 21 0
      choozmo/fb_gsearch.py
  49. 42 0
      choozmo/fetch_content.py
  50. 164 0
      choozmo/fetch_serp.py
  51. 17 0
      choozmo/gen_counter_db.py
  52. 356 0
      choozmo/gen_seo.py
  53. 72 0
      choozmo/gmailtest.py
  54. 20 13
      choozmo/googlenews.py
  55. 110 0
      choozmo/gsc_tree.py
  56. 110 0
      choozmo/gsc_tree2.py
  57. 112 0
      choozmo/gsc_tree3.py
  58. 5 4
      choozmo/gsearch_general.py
  59. 3 0
      choozmo/gsearch_libtest.py
  60. 11 4
      choozmo/igool/igtree.py
  61. 16 0
      choozmo/imp_selected.py
  62. 34 0
      choozmo/imp_ytlist.py
  63. 91 0
      choozmo/linkedin_detail.py
  64. 19 0
      choozmo/mail_list_imp_csv.py
  65. 112 0
      choozmo/meta_1777.py
  66. 12 0
      choozmo/phantomtest.py
  67. 319 0
      choozmo/priv_seo.py
  68. 7 0
      choozmo/privseo.bat
  69. 87 0
      choozmo/profile_selenium.py
  70. 42 0
      choozmo/proxytest.py
  71. 15 0
      choozmo/redis_test.py
  72. 52 0
      choozmo/save_search_result.py
  73. 104 0
      choozmo/seo_hhh.py
  74. 50 0
      choozmo/seo_routine.py
  75. 158 0
      choozmo/serp_searchhome.py
  76. 22 5
      choozmo/term_get_email.py
  77. 10 0
      choozmo/term_gsearch.py
  78. 11 0
      choozmo/test.py
  79. 121 0
      choozmo/watch_yt.py
  80. 7 0
      choozmo/yt1.sh
  81. 7 0
      choozmo/yt2.sh
  82. 8 0
      choozmo/yt3.sh
  83. 7 0
      choozmo/yt4.sh
  84. 147 0
      click_and_notify/click_and_not.py
  85. 8 0
      console/google_status.py
  86. 24 0
      console/hhh_start_process.py
  87. 10 0
      console/restart_seo_tiny_docker.py
  88. 15 0
      docker/alpine-basic/Dockerfile
  89. 3 0
      docker/alpine-basic/script.txt
  90. 23 0
      docker/alpine-chrome/Dockerfile
  91. 3 0
      docker/alpine-chrome/script.txt
  92. 4 0
      docker/alpine-hhh/Dockerfile
  93. 8 0
      docker/alpine-poi/Dockerfile
  94. 6 0
      docker/alpine-poi/script.txt
  95. 3 0
      docker/alpine-seo/Dockerfile
  96. 3 0
      docker/alpine-test/Dockerfile
  97. 27 0
      docker/chrome99-test/Dockerfile
  98. 63 0
      docker/gat/package.json
  99. 19 0
      docker/headless-clickbot/Dockerfile
  100. 4 0
      docker/hhh-backstage-docker/Dockerfile

+ 27 - 0
.favorites.json

@@ -16,5 +16,32 @@
         "workspaceRoot": null,
         "workspacePath": null,
         "id": "uQeJKy35vHfLxa6P"
+    },
+    {
+        "type": "File",
+        "name": "c:\\gitlab\\farms\\202106\\remote_yt_publish.py",
+        "parent_id": null,
+        "fsPath": "c:\\gitlab\\farms\\202106\\remote_yt_publish.py",
+        "workspaceRoot": null,
+        "workspacePath": null,
+        "id": "kgRhBXxnf3btymE1"
+    },
+    {
+        "type": "File",
+        "name": "c:\\gitlab\\farms\\0_projs\\projs.md",
+        "parent_id": null,
+        "fsPath": "c:\\gitlab\\farms\\0_projs\\projs.md",
+        "workspaceRoot": null,
+        "workspacePath": null,
+        "id": "BeR9IWGd4mnAxbfX"
+    },
+    {
+        "type": "File",
+        "name": "c:\\gitlab\\farms\\202106\\gen_yt_from_gsc.py",
+        "parent_id": null,
+        "fsPath": "c:\\gitlab\\farms\\202106\\gen_yt_from_gsc.py",
+        "workspaceRoot": null,
+        "workspacePath": null,
+        "id": "AErchNbdxYf1iQz3"
     }
 ]

+ 2 - 4
.google-cookie

@@ -1,5 +1,3 @@
 #LWP-Cookies-2.0
-Set-Cookie3: 1P_JAR="2021-07-11-15"; path="/"; domain=".google.com"; path_spec; domain_dot; secure; expires="2021-08-10 15:42:07Z"; version=0
-Set-Cookie3: NID="218=O1CBm20lYnsV_ltrg-nL5DcUJNUBwoxWC6DN7AbiPyr8VZB_46f0DsJSxUCrCSpto6xERzmwwCV7FJ_m8V9KdAG6FSCEKeTG4ohk26LTz9-2nZZm8ktguyafTFc7fkVSqM8fubxeV3gAmvBML_TbOFXlF-Jn2nkLTBqo5j15EgQ"; path="/"; domain=".google.com"; path_spec; domain_dot; expires="2022-01-10 15:24:06Z"; HttpOnly=None; version=0
-Set-Cookie3: CGIC=""; path="/complete/search"; domain=".google.com"; path_spec; domain_dot; expires="2022-01-07 15:24:06Z"; HttpOnly=None; version=0
-Set-Cookie3: CGIC=""; path="/search"; domain=".google.com"; path_spec; domain_dot; expires="2022-01-07 15:24:06Z"; HttpOnly=None; version=0
+Set-Cookie3: 1P_JAR="2022-03-05-12"; path="/"; domain=".google.com"; path_spec; domain_dot; secure; expires="2022-04-04 12:37:25Z"; version=0
+Set-Cookie3: NID="511=jmFnFRJVAylFKm1tJEgc_m5zaMSlCb63QuX4aANXOz5cgS7C5dSXhqTN9abkS4qLLsAx1igUAb3FvCJFmO3ewVBVYup-OmV__TpyJ7a2Aav1aA0nKuybwsi6gQqyOkbWqmLJsc5aariaE_827PoCwVZHpAt9CB7g2sKO8UJ1o_U"; path="/"; domain=".google.com"; path_spec; domain_dot; expires="2022-09-04 12:37:25Z"; HttpOnly=None; version=0

+ 22 - 0
.vscode/workbench.json

@@ -0,0 +1,22 @@
+{
+	"currentWorkbench": {
+		"files": [
+			{
+				"path": "kw_tools/choozmo/gsearch_general.py",
+				"alias": " 1 choozmo/gsearch_general.py"
+			},
+			{
+				"path": "kw_tools/choozmo/proxytest.py",
+				"alias": " 2 choozmo/proxytest.py"
+			},
+			{
+				"path": "kw_tools/hhh/yt/yt_list.py",
+				"alias": " 3 yt/yt_list.py"
+			},
+			{
+				"path": "kw_tools/hhh/yt_dl.py",
+				"alias": " 4 hhh/yt_dl.py"
+			}
+		]
+	}
+}

+ 30 - 0
INNNews/commerce.py

@@ -0,0 +1,30 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import dataset
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+domain='beastparadise.net'
+cursor=db.query('SELECT term FROM seo.selected_kw where domain = "'+domain+'"  order by rand()')
+kw={}
+for c in cursor:
+    kw['kw']=c['term']
+print(kw['kw'])
+if os.name == 'nt':
+    py='python'
+else:
+    py='python3'
+
+os.system(py+' general_clickbot.py get --kw="'+kw['kw']+'" --domain='+domain)

+ 6 - 0
INNNews/dbtest.py

@@ -0,0 +1,6 @@
+import dataset
+db = dataset.connect('mysql://johnny:pAsJohnny_2022@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+cursor=db.query('SELECT count(*) as cnt FROM google_poi.shop_list3')
+for c in cursor:
+    print(c['cnt'])
+

+ 146 - 0
INNNews/ga_daily_test.py

@@ -0,0 +1,146 @@
+#!/usr/bin/python3
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import threading
+from googleapiclient.discovery import build
+from oauth2client.service_account import ServiceAccountCredentials
+import dataset
+from datetime import datetime
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+db.query('delete from ga_pagepath')
+db.begin()
+
+
+SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
+KEY_FILE_LOCATION = '/root/src/kw_tools/monitor/choozmo-ga-beee24b7a4c1.json' #'c:\\keys\\choozmo-ga-beee24b7a4c1.json'
+VIEW_ID = '188916214'
+
+# line notify header
+headers = {
+        "Authorization": "Bearer " + "WekCRfnAirSiSxALiD6gcm0B56EejsoK89zFbIaiZQD",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+com_table = []
+
+def send_msg(kw):
+  # line notify send message
+    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') #現在時間
+    hour = datetime.now().strftime('%H') 
+    # 判斷是否達標
+    complet="否"
+    for i in range(0,25):
+      if int(hour)+1==com_table[i][0]:
+        print(i)
+        if int(kw) > com_table[i][1] : 
+          complet="是"
+      elif int(hour) == 24:
+        if int(kw) > 70000 : 
+          complet="是"
+    params = {"message": "\n現在時間: " + current_time + "\n當前pageViews: "+kw + "\n是否達標: " + complet}  
+    print(params)
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+    print(r.text)
+
+
+def initialize_analyticsreporting():
+  """Initializes an Analytics Reporting API V4 service object.
+
+  Returns:
+    An authorized Analytics Reporting API V4 service object.
+  """
+  credentials = ServiceAccountCredentials.from_json_keyfile_name(
+      KEY_FILE_LOCATION, SCOPES)
+
+  # Build the service object.
+  analytics = build('analyticsreporting', 'v4', credentials=credentials)
+
+  return analytics
+
+
+def get_report(analytics,body):
+  """Queries the Analytics Reporting API V4.
+
+  Args:
+    analytics: An authorized Analytics Reporting API V4 service object.
+  Returns:
+    The Analytics Reporting API V4 response.
+  """
+  return analytics.reports().batchGet(
+      body={
+        'reportRequests':body
+      }
+  ).execute()
+
+
+def print_response(response):
+  """Parses and prints the Analytics Reporting API V4 response.
+
+  Args:
+    response: An Analytics Reporting API V4 response.
+  """
+  result=[]
+  for report in response.get('reports', []):
+    columnHeader = report.get('columnHeader', {})
+    dimensionHeaders = columnHeader.get('dimensions', [])
+    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
+
+    for row in report.get('data', {}).get('rows', []):
+      dimensions = row.get('dimensions', [])
+      dateRangeValues = row.get('metrics', [])
+      ga_dict={}
+
+      for header, dimension in zip(dimensionHeaders, dimensions):
+#        print(header + ': ', dimension)
+        ga_dict[header]=dimension
+      
+      for i, values in enumerate(dateRangeValues):
+#        print('Date range:', str(i))
+        for metricHeader, value in zip(metricHeaders, values.get('values')):
+          ga_dict[metricHeader.get('name')]=value
+#          print(metricHeader.get('name') + ':', value)
+        result.append(ga_dict)
+    return result
+#        print(ga_dict)
+
+def main():
+  analytics = initialize_analyticsreporting()
+#(FB_|facebook|IG_|LINE_|LINEMP_|qsear.ch)
+  current_time = datetime.now().strftime('%Y-%m-%d') #現在時間
+  body=[{ 'viewId': VIEW_ID,
+  'dateRanges': [{'startDate': current_time, 'endDate': current_time}],
+
+  'metrics': [{'expression': 'ga:users'},{'expression': 'ga:newusers'},{'expression': 'ga:sessions'},{'expression': 'ga:pageviews'},{'expression': 'ga:bounceRate'},{'expression': 'ga:pageviewsPerSession'}],
+#  'dimensions': [{'name': 'ga:pagePath'}],
+#  'orderBys':[{"fieldName": "ga:pageviews", "sortOrder": "DESCENDING"}],
+  'pageSize': '100'
+  }]
+
+  response = get_report(analytics,body)
+  ga_dict=print_response(response)
+
+  result=[]
+  for elmt in ga_dict:
+    print(elmt)
+    hour = datetime.now().strftime('%H')
+    if int(hour)+1 > 8 :
+      send_msg(elmt['ga:pageviews'])
+  print('inserting.....')
+
+if __name__ == '__main__':
+#  creat_table()
+  main()

+ 135 - 0
INNNews/general_clickbot.py

@@ -0,0 +1,135 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+#import pymysql
+#pymysql.install_as_MySQLdb()
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['general_log']
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+    if 'site' in q:
+        href = elmts[0].get_attribute('href')
+        txt = elmts[0].text
+        print('clicked....')
+        print(href)
+        print(txt)
+        print("ranking", idx)
+        table.insert(
+            {'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()})
+        webdriver.ActionChains(driver).move_to_element(elmts[0]).perform()
+        webdriver.ActionChains(driver).move_to_element(elmts[0]).click().perform()
+        time.sleep(5)
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                time.sleep(5)
+                break
+        idx+=1
+
+def run_once(q):
+    global driver
+    result=[]
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(
+    options=options,service=s)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print('到此')
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#for c in lst:
+#while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+
+  def get(self, kw,domain,port):
+    print('關鍵字',kw)
+    run_once( (kw,domain,port)   )
+
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+

+ 121 - 0
INNNews/general_remote_click.py

@@ -0,0 +1,121 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+#from INNNews import notifytest
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['general_log']
+
+
+  
+driver=None
+headers = {
+        "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    params = {"message": "error: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+        idx+=1
+
+def run_once(q):
+    global driver
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+
+    options.add_argument('--disable-dev-shm-usage')
+    options.add_argument('--no-sandbox')
+#    options.add_argument('--remote-debugging-port=9222')
+
+    epath=os.environ['WEBDRIVER']
+
+#    options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
+    options.add_experimental_option("debuggerAddress", q[2])
+
+#    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+#    driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    driver = webdriver.Chrome(executable_path=epath,options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print(q)
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+class JParams(object):
+
+  def get(self, kw,domain,address):
+    try:
+        print(kw)
+        print(domain)
+        run_once( (kw,domain,address)   )
+    except:
+        traceback.print_exc()
+        send_msg('SEO docker exception... stop')
+
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+

+ 61 - 0
INNNews/get_spread.py

@@ -0,0 +1,61 @@
+#!/usr/bin/python3
+import sys
+import codecs
+import traceback
+import redis
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+from itertools import islice
+
+def save_sheet(df,filename,tabname,startpos='A1'):
+
+    scope = ['https://spreadsheets.google.com/feeds',
+            'https://www.googleapis.com/auth/drive']
+
+    credentials = ServiceAccountCredentials.from_json_keyfile_name('/Users/zooeytsai/kw_tools/INNNews/innnews-d27b01472ba2.json', scope)
+    gc = gspread.authorize(credentials)
+    spread = Spread(filename,creds=credentials)
+
+    spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
+
+def get_sheets(filename):
+    result=[]
+    scope = ['https://spreadsheets.google.com/feeds',
+            'https://www.googleapis.com/auth/drive']
+
+    credentials = ServiceAccountCredentials.from_json_keyfile_name('/Users/zooeytsai/kw_tools/INNNews/innnews-d27b01472ba2.json', scope)
+    gc = gspread.authorize(credentials)
+#    files=gc.list_spreadsheet_files()
+#    print(files)
+    spread = Spread(filename,creds=credentials)
+
+    df=spread.sheet_to_df(sheet='2022')
+    for idx, row in islice(df.iterrows(), 1, None):
+        r1=row[1].strip()
+        if len(r1)>1:
+            r2=row[2].strip()
+            r3=row[3].strip()
+            if "病假" not in r1:
+                result.append({'kw':r1+" "+r2+" "+r3})
+            else:
+                pass
+    return result
+
+result=get_sheets('引新聞INNEWS稿單xSEO優化')
+js=json.dumps(result, ensure_ascii=False)
+
+print(js)
+# r = redis.Redis(host='db.ptt.cx', port=6379, db=1,password='choozmo9')
+# r.set('innews_sheet',js)
+
+#    spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
+#

+ 95 - 0
INNNews/local_1777.py

@@ -0,0 +1,95 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+import json
+import random
+import time
+import sys
+import codecs
+import random
+import os
+import time
+from userAgentRandomizer import userAgents
+
+driver=None
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver=None
+    try:
+        ua = userAgents()
+
+        user_agent = ua.random()        
+
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        options.add_argument("--headless")
+
+        print(user_agent)
+        options.add_argument("--user-agent=" +user_agent)
+        options.add_argument("--incognito")
+        driver=None
+        try:
+            driver = webdriver.Chrome(options=options)
+        except:
+            traceback.print_exc()
+#            driver.quit()
+#            os.system('pkill -f ')
+            os.system('kill %d' % os.getpid())
+            sys.exit()
+            return
+        driver.set_window_size(1400,1000)
+        return
+    except:
+        import traceback
+        traceback.print_exc()
+        driver=None
+        return None
+
+def run_once(url):
+    global driver
+    i=random.randint(0,7)
+    if i==0 or driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        driver.execute_script('window.open("'+url+'","_blank");')
+        driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+        time.sleep(0.5)
+    except:
+        print('exception')
+
+
+lst=[]
+lst.append('https://innews.com.tw/62183/')
+lst.append('https://innews.com.tw/48338/')
+lst.append('https://innews.com.tw/62326/')
+lst.append('https://innews.com.tw/38246/')
+lst.append('https://innews.com.tw/24843/')
+
+#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
+    #for i in range(20):
+#while True:
+for i in range(500):
+    l=random.choice(lst)
+    print(l)
+    run_once(l)
+

+ 131 - 0
INNNews/local_general_clickbot.py

@@ -0,0 +1,131 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['general_log']
+
+
+  
+driver = None
+
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    if len(elmts) <=0:
+#    driver.save_screenshot('c:/tmp/test.png')
+    clicked=False
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+#                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                clicked=True
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                time.sleep(5)
+                break
+        idx+=1
+    if not clicked:
+        print('quit')
+        driver.quit()
+        os.exit(-1)
+
+
+def run_once(q):
+    global driver
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument('--remote-debugging-port=9922')
+    options.add_argument('--remote-debugging-port='+str(q[2]))
+
+#    options.add_experimental_option("debuggerAddress", "127.00:9922")
+    # options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+#    driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    if os.name=='nt':
+        driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    else:
+        driver = webdriver.Chrome(executable_path='/opt/webdriver/98/chromedriver',options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print('到此')
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#for c in lst:
+#while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+
+  def get(self, kw,domain,port):
+    print(kw)
+    print(domain)
+    try:
+        run_once( (kw,domain,port)   )
+    except:
+        print('exception, restarting.....')
+        return -1
+
+#        os.system('docker container restart tiny1')
+#        time.sleep(10)
+
+if __name__ == '__main__':
+  val=fire.Fire(JParams)
+

+ 53 - 0
INNNews/notifytest.py

@@ -0,0 +1,53 @@
+import redis
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import dataset
+import time
+import rpyc
+import sys
+import docker
+import pymysql
+pymysql.install_as_MySQLdb()
+from monitor import GA_innnews
+
+headers = {
+        "Authorization": "Bearer " + "zsaS4I79fkbun9LLF8mQmeHL4PlEtBtN7OLzTp8m72Y",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    send_message = GA_innnews.main()
+    params = {"message": kw+send_message['message']}
+    print('通知結果',params)
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+while True:
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    cursor=db.query('SELECT * FROM seo.general_log where domain = "innews.com.tw" and datediff(now(),dt)=0 order by dt desc')
+    result="\n"
+    idx=0
+    for c in cursor:
+        idx+=1
+        if idx<=3:
+            elmt=c['kw']
+            result+="["+str(c['ranking'])+"]:"+elmt+"\n"
+    
+    msg="\n機器人執行次數:"+str(idx)
+    send_msg(msg+result)
+    time.sleep(1800)
+
+# schtasks /create /sc minute /mo 30 /sd 2022/05/050 /st 9:00 /et 23:00 /tn "linebat" /tr "C:\tmp\inn_line.bat"

+ 77 - 0
INNNews/notifytest2.py

@@ -0,0 +1,77 @@
+import redis
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import dataset
+import time
+import rpyc
+import sys
+import docker
+
+headers = {
+        "Authorization": "Bearer " + "eEVYaouu4zJUWdfCwRn8e0G9bnsbemmUNnIJY8LL1Lw",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+import subprocess
+
+batcmd="dir"
+
+import rpyc
+import os
+conn = rpyc.classic.connect("192.168.192.139",18812)
+conn.execute('import subprocess')
+rsub = conn.modules.subprocess
+result = rsub.check_output('docker ps |grep alpine-poi | wc -l', shell=True)
+alston1="alston1:"+result.decode('utf-8').strip()
+
+conn = rpyc.classic.connect("192.168.192.43",18812)
+conn.execute('import subprocess')
+rsub = conn.modules.subprocess
+result = rsub.check_output('docker ps |grep alpine-poi | wc -l', shell=True)
+alston2="alston2:"+result.decode('utf-8').strip()
+
+#sys.exit()
+#ros.system('docker restart 62eab82f7759') 
+#print('after restart')
+#time.sleep(11)
+#print('after wait')
+
+
+
+
+def send_msg(kw):
+    params = {"message": kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+cursor=db.query('SELECT count(*) as cnt FROM google_poi.shop_list3;')
+cnt=''
+for c in cursor:
+    cnt=str(c['cnt'])
+#result="\n"
+#idx=0
+#for c in cursor:
+#    idx+=1
+#    if idx<=3:
+#        elmt=c['kw'].split(' ')[0]
+#        result+="["+str(c['ranking'])+"]:"+elmt+"\n"
+
+#msg="\n機器人執行次數:"+str(idx)
+msg="shop_list3: "+cnt+"\n"
+msg+=alston1+"\n"
+msg+=alston2
+send_msg(msg)
+

+ 53 - 0
INNNews/routine_click.py

@@ -0,0 +1,53 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+#from INNNews import notifytest
+
+
+probdict={}
+probdict[0]={'client':'hhh','domain':'hhh.com.tw'}
+probdict[1]={'client':'毛怪','domain':'beastparadise.net'}
+probdict[2]={'client':'清原','domain':'taroboba-yuan.com'}
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+prob=[0,0,0,0,1,1,2,2]
+idx=random.choice(prob)
+
+client=probdict[idx]['client']
+domain=probdict[idx]['domain']
+
+term=None
+address="192.168.192.45:9922"
+cursor=db.query('SELECT term FROM seo.selected_kw where client = "'+client+'" order by rand() limit 1')
+for c in cursor:
+    term=c['term']
+    break
+
+if os.name == 'nt':
+    py='python'
+else:
+    py='python3'
+
+
+#set WEBDRIVER=C:/portable/webdriver/chrome98/chromedriver.exe
+#os.system(py+' general_remote_click.py get --kw="'+term+'" --domain='+domain+' --address="'+address+'" --epath="C:/portable/webdriver/chrome98/chromedriver.exe"')
+os.system(py+' general_remote_click.py get --kw="'+term+'" --domain='+domain+' --address="'+address+'"')
+
+

+ 70 - 0
INNNews/run_jared.py

@@ -0,0 +1,70 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import random
+import urllib
+import json
+import fire
+import datetime
+import os
+import redis
+import time
+
+def run_once(pport,dockername):
+#    mywords=['邱小莫 元宇宙 集仕多','元宇宙策展 集仕多','引新聞 集仕多','集仕多 元宇宙','集仕多 策展','集仕多 邱小莫','集仕多 主播','南橘','茶葉直播','連千毅nft','南投龍華宮','元欣','引新聞 炸物']
+#    mywords=['邱小莫 元宇宙 集仕多','元宇宙策展 集仕多','引新聞 集仕多','集仕多 元宇宙','集仕多 策展','集仕多 邱小莫','集仕多 主播','南橘','茶葉直播','連千毅nft','南投龍華宮','元欣','引新聞 炸物']
+#    mywords=['紅妃q木瓜','公園的好處','賣茶葉直播','引新聞','引新聞 集仕多','主播 引新聞','逗比 引新聞','南橘 引新聞','茶葉直播 引新聞','連千毅nft 引新聞','南投龍華宮 引新聞','元欣 引新聞' ,'引新聞 炸物','集仕多 site:innews.com.tw','晚安小雞造假']
+    mywords=['引新聞','引新聞 邱小莫']
+
+    if True:
+        kw={}
+        kw['kw']=random.choice(mywords)
+        intval=0
+
+        print(    'python3 local_general_clickbot.py get --kw="'+kw['kw']+'" --domain=innews.com.tw --port='+str(pport))
+        if os.name=='nt':
+           intval= os.system('python local_general_clickbot.py get --kw="'+kw['kw']+'" --domain=innews.com.tw --port='+str(pport))
+
+        else:
+            intval=os.system('python3 local_general_clickbot.py get --kw="'+kw['kw']+'" --domain=innews.com.tw --port='+str(pport))
+        print('等待')
+        print(intval)
+        if intval==-1:
+            print('-1')
+            sys.exit()
+
+class JParams(object):
+
+  def get(self, port=9222):
+    print(port)
+    while True:
+        try:
+            os.system('docker container restart tiny1')
+            os.system('docker container restart tiny2')
+            os.system('docker container restart tiny3')
+            time.sleep(1)
+            run_once( 9922 ,'tiny1'  )
+            run_once( 9923 ,'tiny2'  )
+            run_once( 9924  ,'tiny3' )
+
+            time.sleep(15)
+
+        except:
+            print('exception.....')
+#            os.system('docker container restart tiny1')
+#            time.sleep(15)
+
+if __name__ == '__main__':
+    while True:
+        os.system('docker container restart tiny1')
+        os.system('docker container restart tiny2')
+        os.system('docker container restart tiny3')
+        time.sleep(1)
+        run_once( 9922 ,'tiny1'  )
+        run_once( 9923 ,'tiny2'  )
+        run_once( 9924  ,'tiny3' )
+
+
+#  fire.Fire(JParams)

+ 39 - 0
INNNews/run_sheet.py

@@ -0,0 +1,39 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import time
+
+r = redis.Redis(host='db.ptt.cx', port=6379, db=1,password='choozmo9')
+js=r.get('innews_sheet')
+jsobj=json.loads(js)
+while True:
+    kw=random.choice(jsobj)  # 第一次(三組關鍵字)
+    
+    kw_innews = '引新聞 ' # 第二次(隨機選擇兩組關鍵字與引新聞)
+    num = random.choices([0,1,2],k=2)
+    for i in num:
+        kw_innews+=kw['kw'].split(' ')[i]+' '
+    print(kw)
+    
+    
+    # os.chdir('/Users/zooeytsai/kw_tools/INNNews')
+    print('python3 general_clickbot.py get --kw="'+kw['kw']+'" --domain=innews.com.tw')
+    os.system('python3 general_clickbot.py get --kw="'+kw['kw']+'" --domain=innews.com.tw')
+    print('等待')
+    time.sleep(120)
+# time.sleep(120)
+# os.system('python3 general_clickbot.py get --kw="'+kw_innews+'" --domain=innews.com.tw')
+
+# os.system('python notifytest.py send_msg')

+ 86 - 0
INNNews/run_sheet_2.py

@@ -0,0 +1,86 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import time
+import fire
+import dataset
+
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term, domain from seo.selected_kw where client!="毛怪"')
+    for c in cursor:
+        lst.append([c['term'], c['domain']])
+    
+    obj = random.choice(lst)
+    kw = obj[0]
+    domain = obj[1]
+    print(kw, domain)
+    
+    intval = os.system(f'python3 general_clickbot.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
+    
+    print('執行完成genetal_clickbot')
+    
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+
+def run_specific(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term, domain from seo.selected_kw where client="神助物流"')
+    for c in cursor:
+        lst.append([c['term'], c['domain']])
+    
+    obj = random.choice(lst)
+    kw = obj[0]
+    domain = obj[1]
+    print(kw,domain)
+
+    intval = os.system(f'python3 general_clickbot.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')    
+    print('執行完成神助genetal_clickbot')
+
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny1')
+                os.system('docker container restart tiny2')
+                os.system('docker container restart tiny3')
+                os.system('docker container restart tiny8')
+                time.sleep(1)
+                run_once(9922, 'tiny1')
+                run_once(9923, 'tiny2')
+                run_once(9924, 'tiny3')
+                run_specific(9929, 'tiny8')
+                time.sleep(20)
+                break           
+            except:
+                os.system('docker container restart tiny1')
+                time.sleep(15)
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)

+ 60 - 0
INNNews/run_sns.py

@@ -0,0 +1,60 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import time
+import fire
+import dataset
+
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term, url, client from seo.sns_kw')
+    for c in cursor:
+        lst.append([c['term'], c['url'], c['client']])
+    
+    obj = random.choice(lst)
+    kw = obj[0]
+    url = obj[1]
+    client = obj[2]
+    print(kw, url)
+    
+    intval = os.system(f'python3 sns_clickbot.py get --kw="{kw}" --url="{url}" --client="{client}" --port="{str(pport)}"')
+    
+    print('執行完成genetal_clickbot')
+    
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny6')
+                time.sleep(1)
+                
+                break
+            except:
+                os.system('docker container restart tiny6')
+                time.sleep(15)
+            run_once(9927, 'tiny6')
+            # time.sleep(20)
+
+if __name__ == '__main__':
+    fire.Fire(JParams)

+ 113 - 0
INNNews/sns_clickbot.py

@@ -0,0 +1,113 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+#import pymysql
+#pymysql.install_as_MySQLdb()
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['sns_log']
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    url=qs[1]
+    client=qs[2]
+    global driver
+    escaped_search_term = urllib.parse.quote(q)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(10)
+    # fname=term.replace(' ','_')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    # df=pd.DataFrame()
+
+    elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    idx = 1
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if href == url:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                table.insert({'kw':q,'client':client,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                time.sleep(5)
+                break
+        idx+=1
+
+def run_once(q):
+    global driver
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(
+    options=options,service=s)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+class JParams(object):
+
+  def get(self, kw,url,client,port):
+    print('關鍵字',kw)
+    run_once( (kw,url,client,port)  )
+
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+

+ 43 - 0
INNNews/spread_test.py

@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+
+def save_sheet(df,filename,tabname,startpos='A1'):
+
+    scope = ['https://spreadsheets.google.com/feeds',
+            'https://www.googleapis.com/auth/drive']
+
+    credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\innnews-d27b01472ba2.json', scope)
+    gc = gspread.authorize(credentials)
+    spread = Spread(filename,creds=credentials)
+
+    spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
+
+def get_sheets(filename):
+
+    scope = ['https://spreadsheets.google.com/feeds',
+            'https://www.googleapis.com/auth/drive']
+
+    credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\innnews-d27b01472ba2.json', scope)
+    gc = gspread.authorize(credentials)
+#    files=gc.list_spreadsheet_files()
+#    print(files)
+    spread = Spread(filename,creds=credentials)
+    for ws in spread.sheets:
+        print(ws.title)
+
+get_sheets('引新聞xSEO團隊共用表單')
+#    spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
+#

+ 109 - 0
SEO/click_and_save.py

@@ -0,0 +1,109 @@
+import random
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import schedule
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+
+_LOG_SERVER = ('hhh.ptt.cx', 514)
+logger = logging.getLogger('clickbot_100')
+handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
+logger.addHandler(handler1)
+logger.debug('[click_and_save][DB]begin')
+
+
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless') 
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(950,6000)
+    return driver
+
+
+def process_one():
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst=[]
+    table=db['save_result_listclick']
+    cursor=db.query('select kw,page,domain from seo_clickjobs where category="202204" order by rand()')
+    for c in cursor:
+        lst.append(c)
+
+
+    entry=random.choice(lst)
+    term=entry['kw']
+    print(term)
+    domain=entry['domain']
+    logger.debug('[clickbot_100]['+term+']')
+    driver=restart_browser()
+    escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
+
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    fname=term.replace(' ','_')
+    df=pd.DataFrame()
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    clickelmt=None
+    cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            if domain in href:
+                clickelmt=elmt
+                logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
+
+            print(href)
+            print(elmt.text)
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+
+            table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
+            cnt+=1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if clickelmt:
+        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+    if len(datadict['結果標題'])<=0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    df['搜尋詞']=datadict['搜尋詞']
+    df['結果標題']=datadict['結果標題']
+    df['結果網址']=datadict['結果網址']
+    df['結果名次']=datadict['結果名次']
+
+    driver.quit()
+
+process_one()
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--loop')
+args = parser.parse_args()
+
+if args.loop:
+
+#    schedule.every(6).minutes.do(process_one)
+    schedule.every(0.4).minutes.do(process_one)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

+ 13 - 7
SEO/clickbot_100.py

@@ -15,12 +15,17 @@ import sys
 from logging.handlers import SysLogHandler
 import socket
 import pandas as pd
-
+import socket
+import os
 _LOG_SERVER = ('hhh.ptt.cx', 514)
 logger = logging.getLogger('clickbot_100')
 handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
 logger.addHandler(handler1)
-logger.debug('[clickbot_100][清原]begin')
+#logger.debug('[clickbot_100][清原]begin')
+
+hname=socket.gethostname()
+pid=str(os.getpid())
+logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')
 
 
 def restart_browser():
@@ -34,8 +39,9 @@ def restart_browser():
 def process_one():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
     lst=[]
-    table=db['save_result']
-    cursor=db.query('select term from selected_kw where client="清原" and term not in (SELECT distinct(keyword) FROM seo.save_result where url like "%taroboba-yuan.com%" and datediff(now(),dt)=0)')
+    table=db['google_rank']
+    cursor = db.query('select term from seo.selected_kw')
+    # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
     for c in cursor:
         lst.append(c['term'])
 
@@ -50,9 +56,9 @@ def process_one():
     driver.get(googleurl)
     time.sleep(6)
     fname=term.replace(' ','_')
-    driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
     df=pd.DataFrame()
-
+    # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
     elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
 
     clickelmt=None
@@ -90,7 +96,7 @@ def process_one():
     df['結果網址']=datadict['結果網址']
     df['結果名次']=datadict['結果名次']
 
-    df.to_excel('c:/tmp/seo/'+fname+".xls")
+    df.to_excel('/Users/zooeytsai/'+fname+".xls")
 
     driver.quit()
 

+ 96 - 0
SEO/crawl_web.py

@@ -0,0 +1,96 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+# import rpyc
+import sys
+import docker
+# import googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+import html2text
+
+def process_one(driver):
+    lst=[]
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+#            print(href)
+            txt=elmt.text.split('\n')
+            print(txt[0])
+            lst.append({'title':txt[0],'url':href})
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    return lst
+
+def process_query(driver,qs,number_results=10,language_code='zh-TW',enable_next=True):
+    escaped_search_term=urllib.parse.quote(qs)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code)
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(3)
+    totallst=[]
+    while True:
+        lst=process_one(driver)
+        totallst+=lst
+        try:
+            if enable_next:
+                time.sleep(3)
+                elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            else:
+                break
+        except:
+            traceback.print_exc()
+            print('pnnext exception')
+            break
+        time.sleep(1.5)
+    return totallst
+
+
+result=[]
+driver=None
+
+def restart_browser():
+#    os.system('docker container restart p4444')
+#    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=http://80.48.119.28:8080")
+#    driver=webdriver.Chrome(executable_path='/Users/zooeytsai/Downloads/chromedriver',options=options)
+
+    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+    #driver = webdriver.Remote(
+    #    command_executor='http://127.0.0.1:4444/wd/hub',
+    #desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['kw_url_search_result']
+driver=restart_browser()
+lst=process_query(driver,'班尼斯 site:mobile01.com',number_results=50,language_code='zh-TW',enable_next=False)
+for l in lst:
+    table.insert(l)
+
+print(lst)
+
+#print(html2text.html2text("<p><strong>Zed's</strong> dead baby, <em>Zed's</em> dead.</p>"))

+ 97 - 0
SEO/exp_100.py

@@ -0,0 +1,97 @@
+import random
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import schedule
+
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import socket
+import os
+#logger.debug('[clickbot_100][清原]begin')
+
+hname=socket.gethostname()
+pid=str(os.getpid())
+
+
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless') 
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(950,6000)
+    return driver
+
+
+def process_one():
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst=[]
+#    table=db['google_rank']
+#    cursor = db.query('select term from seo.selected_kw')
+    # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
+#    for c in cursor:
+#        lst.append(c['term'])
+
+#    term=random.choice(lst)
+    term='tha 娛樂城'
+    print(term)
+    driver=restart_browser()
+    escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
+
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    fname=term.replace(' ','_')
+    driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    df=pd.DataFrame()
+    # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    clickelmt=None
+    cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            if 'taroboba-yuan.com' in href:
+                clickelmt=elmt
+
+            print(href)
+            print(elmt.text)
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+
+            cnt+=1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if clickelmt:
+        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+    if len(datadict['結果標題'])<=0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    df['搜尋詞']=datadict['搜尋詞']
+    df['結果標題']=datadict['結果標題']
+    df['結果網址']=datadict['結果網址']
+    df['結果名次']=datadict['結果名次']
+
+#    df.to_excel('/Users/zooeytsai/'+fname+".xls")
+    df.to_excel('c:/tmp/seo/'+fname+".xls")
+
+    driver.quit()
+
+process_one()

+ 53 - 0
SEO/fb_comment.py

@@ -0,0 +1,53 @@
+from selenium.webdriver.common.keys import Keys
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import redis
+import json
+import numpy as np
+
+account = ['enjoylisteningswift@yandex.ru']
+pd = ['']
+path = '/Users/zooeytsai/Downloads/chromedriver 2'
+post_url = ['']
+
+
+def send_comment(text):
+    s = Service(path)
+    driver = webdriver.Chrome(service=s)
+    driver.get('https://www.facebook.com/')
+    time.sleep(5)
+    a = driver.find_element(By.ID,"email")
+    p = driver.find_element(By.ID,"pass")
+    a.send_keys(account[0])
+    p.send_keys(pd[0])
+    time.sleep(3)
+    login = driver.find_element(By.XPATH,'/html/body/div[1]/div[2]/div[1]/div/div/div/div[2]/div/div[1]/form/div[2]/button')
+    login.click()
+    time.sleep(10)
+    driver.get(post_url[0])
+    time.sleep(5)
+    # comment = driver.find_element(By.XPATH,'/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[1]/div[5]/div/div[2]/div[2]/div[1]/form/div/div/div[1]/p').send_keys(text)
+    comment = driver.find_element(By.XPATH,'/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[1]/div[5]/div/div/div[2]/div[1]/form/div/div/div[1]/p')
+    comment.send_keys(text)
+    driver.implicitly_wait(5)
+    comment.send_keys(Keys.ENTER)
+    driver.implicitly_wait(5)
+    driver.quit()
+
+    
+def random_comment():
+    r = redis.Redis(host='db.ptt.cx', port=6379, db=0, password='choozmo9')
+    js = r.get('yt_comment')
+    messages = json.loads(js)
+    # ===============================
+    r = np.random.randint(0, len(messages))
+    print(messages[r])
+    return messages[r]
+
+
+if __name__ == "__main__":
+    send_comment(random_comment())

+ 102 - 0
SEO/fetch__url_content.py

@@ -0,0 +1,102 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+# import rpyc
+import sys
+import docker
+# import googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+import html2text
+from userAgentRandomizer import userAgents
+from fp.fp import FreeProxy
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+db2 = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+
+cursor=db.query('select url from kw_url_search_result where url not in (select url from url_content) order by rand()')
+
+def restart_browser():
+    os.system('docker container restart proxy1')
+    ua = userAgents()
+    user_agent = ua.random()        
+    time.sleep(8)
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--headless")
+#    proxy = FreeProxy().get()
+#    print(proxy)
+#    sys.exit()
+    options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
+    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(1400,1000)
+    driver.delete_all_cookies()
+
+    return driver
+
+
+def clean_txt(txt):
+    fulltxt=""
+    lines=txt.split("\n")
+    beginning=False
+    for l in lines:
+        ltxt=l.strip()
+        if '  * __'==ltxt:
+            continue
+        if '我要回覆'==ltxt:
+            continue
+        if beginning:
+            fulltxt+=l+"\n"
+        else:
+            if '  * __ 訂閱文章' in l:
+                beginning=True
+        if ('__ 連結 __ 回報 __ 只看樓主 __ 列印' in l):
+            break
+        if '__ 連結 __ 回報 __ 只看此人 __ 列印' in l:
+            break
+    print(fulltxt)
+    return fulltxt
+
+driver=restart_browser()
+table=db2['url_content']
+for c in cursor:
+    url=c['url']
+    print(c['url'])
+#    driver.get('https://whatismyipaddress.com/')
+#    time.sleep(9999)
+    driver.get(c['url'])
+    
+    time.sleep(5)
+    if 'Please Wait' in driver.title and 'Cloudflare' in driver.title:
+        driver=restart_browser()
+        continue
+    src=driver.page_source
+    h = html2text.HTML2Text()
+    h.ignore_links = True
+    txt=h.handle(src)
+    resulttxt=clean_txt(txt)
+    table.insert({'content':resulttxt,'url':url})
+    time.sleep(5)
+#    print()
+#    break
+
+
+#print(html2text.html2text("<p><strong>Zed's</strong> dead baby, <em>Zed's</em> dead.</p>"))

+ 108 - 0
SEO/general_clickjob.py

@@ -0,0 +1,108 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import dataset
+import time
+import traceback
+import sys
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+
+def run_once(q):
+    global driver
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(
+    options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print(q)
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
+lst=[]
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+cursor=db.query('select term,domain from selected_kw  order by rand()')
+
+for c in cursor:
+    lst.append(c)
+
+
+#for c in lst:
+while True:
+    try:
+        c=random.choice(lst)
+        run_once( (c['term'],c['domain'])   )
+    except:
+        traceback.print_exc()
+    sleepint=random.randint(320,520)
+    time.sleep(sleepint)
+

+ 141 - 0
SEO/general_gsrack.py

@@ -0,0 +1,141 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+import random
+import pymysql
+
+pymysql.install_as_MySQLdb()
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table = db['general_log']
+
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def empty_query(q):
+    global driver
+    googleurl = 'https://www.google.com/search?q=' + urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q = qs[0]
+    domain = qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    
+    while True:
+        try:
+            elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+            print('尋找')
+            break
+        except:
+            pass
+    
+    idx = 1
+    ranking = -1
+    print('搜尋結果數量', len(elmts))
+    #    driver.save_screenshot('c:/tmp/test.png')
+    
+    for elmt in elmts:
+        
+        href = elmt.get_attribute('href')
+        txt = elmt.text
+        if len(txt) > 10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                table.insert({'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href,
+                              'dt': datetime.datetime.now(), 'num': 1})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+        idx += 1
+
+
+def run_once(q):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    # s = Service('/Users/zooeytsai/Downloads/chromedriver 2')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", "127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    print('到此')
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+# for c in lst:
+# while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+    
+    def get(self, kw, domain, port):
+        print(kw)
+        print(domain)
+        run_once((kw, domain, port))
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+    
+    
+    def get(self, kw, domain, port):
+        print('kw')
+        print(domain)
+        run_once((kw, domain, port))
+

+ 76 - 0
SEO/gnews_click.py

@@ -0,0 +1,76 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.common.keys import Keys
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+# import rpyc
+import sys
+import docker
+# import googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+import html2text
+from userAgentRandomizer import userAgents
+
+
+def restart_browser():
+    os.system('docker container restart proxy1')
+    ua = userAgents()
+    user_agent = ua.random()        
+    time.sleep(8)
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--headless")
+    options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
+    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(1400,1000)
+    driver.delete_all_cookies()
+
+    return driver
+
+
+driver=restart_browser()
+driver.get('https://news.google.com/topstories?hl=zh-TW&gl=TW&ceid=TW:zh-Hant')
+time.sleep(7)
+elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
+
+title_lst=['《咒術迴戰》降臨全家!不只推出獨家必收集點周邊 還能在1:1還原名場景與主角合照 引新聞',
+'梅雨季正是驗屋好時機!專家分享小撇步教你避免買到漏水屋 引新聞',
+'5月最夯球鞋款式大公開!女性消費者最愛「這個色調」 引新聞',
+'有影/本田仁美加入AKB48八年首登C位驚呼夢想成真!賣力學中文想挑戰翻唱《那些年》 引新聞',
+'萬綠叢中一點紅!白石麻衣化身「自衛隊」女教官 加入町田啓太「肉體派」新劇養眼陣容 引新聞',
+'超商變身辦公室!7-ELEVEN首創付費「多功能包廂專區」 遠距辦公上課更「便」民、開幕5折優惠 引新聞']
+
+title=random.choice(title_lst)
+#if elmt is None:
+#    elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
+if elmt is not None:
+    elmt.send_keys(title)
+    elmt.send_keys(Keys.ENTER)
+    elmt.send_keys(Keys.ENTER)
+
+time.sleep(7)
+
+elmts=driver.find_elements(By.XPATH,"//div[@jsname='esK7Lc']//div[@class='xrnccd']//a[@jsname='hXwDdf']")
+print(elmts[0].get_attribute('href'))
+print(elmts[0].text)
+
+
+time.sleep(9)
+

+ 10 - 0
SEO/gsc-save-credential.py

@@ -0,0 +1,10 @@
+import searchconsole
+#account = searchconsole.authenticate(client_config='c:/keys/client_secret_162277274609-v1fsq5iscscl7e2ta4a8tc0og5tehl44.apps.googleusercontent.com.json',serialize='out.json')
+account = searchconsole.authenticate(client_config='c:/keys/client_secret_162277274609-v1fsq5iscscl7e2ta4a8tc0og5tehl44.apps.googleusercontent.com.json',credentials='c:/keys/out.json')
+
+#account.redirect_uri = 'https://localhost'
+webproperty = account['https://innews.com.tw/']
+
+report = webproperty.query.range('today', days=-7).dimension('query').get()
+print(report.rows)
+#http://localhost:8080

+ 38 - 0
SEO/month_kw_rank.py

@@ -0,0 +1,38 @@
+import pandas as pd
+import time
+
+def day_col(row):
+    result = row['dt'][0:10]
+    return result
+
+
+custom_name = ['毛怪','火柴人','清原','仁本']
+for name in custom_name:
+    df = pd.read_csv(f"/Users/zooeytsai/Documents/{name}5月關鍵字排名進前十名.csv")
+    df['dt2'] = df.apply(day_col, axis=1)
+    df = df.drop_duplicates(subset=['dt2','kw'])
+    df_kw_rank = df[['dt2','kw','ranking']].sort_values(by='dt2')
+    df_kw_rank_2 = df_kw_rank.reset_index(drop=True)
+    df_kw_rank_2.columns = ['日期','關鍵字','名次']
+    print(df_kw_rank_2)
+    # df_=pd.DataFrame(columns=list('  '))
+    # print(pd.concat([df_kw_rank,df_]))
+    data = []
+    num = df.groupby('dt2',as_index=False).size()
+    for index,row in num.iterrows():
+        data.append([row['dt2'],row['size'],20,row['size']*20])
+    df_first = pd.DataFrame(data,columns=['日期','關鍵字出現次數','首頁日費','首頁小計'])
+    #前三名
+    df_top_3 = df.loc[df['ranking']<=3]
+    num_top_3 = df_top_3.groupby('dt2',as_index=False).size()
+    data_2 = []
+    for index,row in num_top_3.iterrows():
+        data_2.append([row['dt2'],row['size'],40,row['size']*40])
+    df_second = pd.DataFrame(data_2,columns=['日期','前3名字組數量','前3名字組日費','前3名字組小計'])
+    df_result = pd.merge(df_first,df_second,on='日期',how='outer').fillna(0)
+    new = pd.concat([df_kw_rank_2,df_result],axis=1)
+    # df_result.insert(0,'日期 ',df_kw_rank['日期'])
+    # df_result.insert(1,'關鍵字 ',df_kw_rank['關鍵字'])
+    # df_result.insert(2,'名次 ',df_kw_rank['名次'])
+    new.to_csv(f"/Users/zooeytsai/Documents/{name}5月績效報表2.csv",index=False)
+    time.sleep(60)

+ 117 - 0
SEO/new_100.py

@@ -0,0 +1,117 @@
+import random
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import schedule
+
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import socket
+import os
+_LOG_SERVER = ('hhh.ptt.cx', 514)
+logger = logging.getLogger('clickbot_100')
+handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
+logger.addHandler(handler1)
+#logger.debug('[clickbot_100][清原]begin')
+
+hname=socket.gethostname()
+pid=str(os.getpid())
+logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')
+
+
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless') 
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(950,6000)
+    return driver
+
+
+def process_one():
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst=[]
+    table=db['google_rank']
+    cursor = db.query('select term from seo.selected_kw')
+    # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
+    for c in cursor:
+        lst.append(c['term'])
+
+    term=random.choice(lst)
+    print(term)
+    logger.debug('[clickbot_100]['+term+']')
+    driver=restart_browser()
+    escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
+
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    fname=term.replace(' ','_')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    df=pd.DataFrame()
+    # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    clickelmt=None
+    cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            if 'taroboba-yuan.com' in href:
+                clickelmt=elmt
+                logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
+
+            print(href)
+            print(elmt.text)
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+
+            table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
+            cnt+=1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if clickelmt:
+        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+    if len(datadict['結果標題'])<=0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    df['搜尋詞']=datadict['搜尋詞']
+    df['結果標題']=datadict['結果標題']
+    df['結果網址']=datadict['結果網址']
+    df['結果名次']=datadict['結果名次']
+
+#    df.to_excel('/Users/zooeytsai/'+fname+".xls")
+    df.to_excel('c:/tmp/'+fname+".xls")
+
+    driver.quit()
+
+process_one()
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--loop')
+args = parser.parse_args()
+
+if args.loop:
+
+#    schedule.every(6).minutes.do(process_one)
+    schedule.every(0.4).minutes.do(process_one)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

+ 114 - 0
SEO/pet100.py

@@ -0,0 +1,114 @@
+import random
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import schedule
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import socket
+import os
+_LOG_SERVER = ('hhh.ptt.cx', 514)
+logger = logging.getLogger('clickbot_100')
+handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
+logger.addHandler(handler1)
+#logger.debug('[clickbot_100][清原]begin')
+
+hname=socket.gethostname()
+pid=str(os.getpid())
+logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')
+
+
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless') 
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(950,6000)
+    return driver
+
+
+def process_one():
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst=[]
+    table=db['save_result']
+    cursor=db.query('select term from selected_kw where client="毛怪" and term not in (SELECT distinct(keyword) FROM seo.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
+    for c in cursor:
+        lst.append(c['term'])
+
+    term=random.choice(lst)
+    print(term)
+    logger.debug('[clickbot_100]['+term+']')
+    driver=restart_browser()
+    escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
+
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    fname=term.replace(' ','_')
+    driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    df=pd.DataFrame()
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    clickelmt=None
+    cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            if 'taroboba-yuan.com' in href:
+                clickelmt=elmt
+                logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
+
+            print(href)
+            print(elmt.text)
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+
+            table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
+            cnt+=1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if clickelmt:
+        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+    if len(datadict['結果標題'])<=0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    df['搜尋詞']=datadict['搜尋詞']
+    df['結果標題']=datadict['結果標題']
+    df['結果網址']=datadict['結果網址']
+    df['結果名次']=datadict['結果名次']
+
+    df.to_excel('c:/tmp/seo/'+fname+".xls")
+
+    driver.quit()
+
+process_one()
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--loop')
+args = parser.parse_args()
+
+if args.loop:
+
+#    schedule.every(6).minutes.do(process_one)
+    schedule.every(0.4).minutes.do(process_one)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

+ 149 - 0
SEO/ranking_day.py

@@ -0,0 +1,149 @@
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import random
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+import os
+from random import randint
+import pymysql
+pymysql.install_as_MySQLdb()
+
+
+path = 'C:\portable\chromedriver'
+path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
+driver = None
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+lst = []
+table = db['google_rank']
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def process_one(item):
+    global driver
+    
+    term = item[0]
+    domain = item[1]
+    print(term, domain)
+    
+    escaped_search_term = urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    # fname=term.replace(' ','_')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    # df=pd.DataFrame()
+    
+    elmts = driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+    cnt = 1
+    datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
+    
+    if len(elmts) == 0:
+        print('chrome異常')
+        os.chdir('/root')
+        os.system('python3 reboot.py')
+    
+    for elmt in elmts:
+        try:
+            href = elmt.get_attribute('href')
+            
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+            if domain in href:
+                print(href)
+                print(elmt.text)
+                table.insert(
+                    {'title': elmt.text, 'url': href, 'keyword': term, 'dt': datetime.datetime.now(), 'num': cnt})
+            cnt += 1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if len(datadict['結果標題']) <= 0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    # df['搜尋詞']=datadict['搜尋詞']
+    # df['結果標題']=datadict['結果標題']
+    # df['結果網址']=datadict['結果網址']
+    # df['結果名次']=datadict['結果名次']
+    #
+    # df.to_excel('/Users/zooeytsai/'+fname+".xls")
+    
+    driver.quit()
+    print('中場休息')
+    time.sleep(randint(90, 120))
+
+
+def run_once(pport, item):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_one(item)
+    time.sleep(3)
+    driver.quit()
+
+
+cursor = db.query('select term,domain from seo.selected_kw')
+for c in cursor:
+    lst.append([c['term'], c['domain']])
+
+    
+for i in lst:
+    print('這裡', i)
+    while True:
+        try:
+            os.system('docker container restart tiny9')
+            time.sleep(1)
+            run_once(9928, i)
+            print('docker開啟完成')
+            cur = db.query('select * from seo.google_rank order by id  desc limit 1')
+            for c in cur:
+                kw = c['keyword']
+            if kw != i[0]:
+                print('稍等,上一筆待完成')
+                time.sleep(60)
+            break
+        except:
+            os.system('docker container restart tiny9')
+            time.sleep(15)
+    print('等待進行下一個關鍵字')
+    time.sleep(5)

+ 139 - 0
SEO/ranking_day2.py

@@ -0,0 +1,139 @@
+from random import randint
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+#import pymysql
+#pymysql.install_as_MySQLdb()
+import random
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+import os
+import fire
+
+path = 'C:\portable\chromedriver'
+path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
+driver = None
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+lst = []
+table = db['google_rank']
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def process_one(item):
+    global driver
+    term = item[0]
+    domain = item[1]
+    print(term, domain)
+    
+    escaped_search_term = urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    # fname=term.replace(' ','_')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    # df=pd.DataFrame()
+    
+    elmts = driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+    cnt = 1
+    clickelmt=None
+    datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
+    print('搜尋結果',len(elmts))
+    for elmt in elmts:
+        try:
+            href = elmt.get_attribute('href')
+            
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+            if domain in href:
+                clickelmt = elmt
+                print(href)
+                print(elmt.text)
+                table.insert(
+                    {'kw': term, 'domain': domain, 'ranking': cnt, 'title': elmt.text, 'url': href,'dt': datetime.datetime.now()})
+#                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+#                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            cnt += 1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if clickelmt:
+        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+        time.sleep(5)
+        print('點擊完成')
+    if len(datadict['結果標題']) <= 0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    # df['搜尋詞']=datadict['搜尋詞']
+    # df['結果標題']=datadict['結果標題']
+    # df['結果網址']=datadict['結果網址']
+    # df['結果名次']=datadict['結果名次']
+    #
+    # df.to_excel('/Users/zooeytsai/'+fname+".xls")
+    
+    driver.quit()
+    print('結束')
+
+
+
+def run_once(q):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_one(q)
+    time.sleep(3)
+    driver.quit()
+
+
+class JParams(object):
+    
+    def get(self, kw, domain, port):
+        print(kw)
+        print(domain)
+        run_once((kw, domain, port))
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+

+ 64 - 0
SEO/run_gsrack.py

@@ -0,0 +1,64 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import time
+import fire
+import dataset
+import pymysql
+pymysql.install_as_MySQLdb()
+
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('SELECT * FROM seo.selected_kw where client="神助物流"')
+    for c in cursor:
+        lst.append([c['kw']])
+    
+    obj = random.choice(lst)
+    print(obj)
+    kw = obj[0]
+    domain = 'hhh.com.tw'
+    print(kw, domain)
+    
+    s = f'python3 general_gsrack.py get --kw="{kw}" --domain="{domain}" --port={str(pport)}'
+    
+    intval = os.system(f'python3 general_gsrack.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
+    
+    print('執行genetal_gsrack')
+    
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny9')
+                time.sleep(10)
+                run_once(9928, 'tiny9')
+            
+            except:
+                os.system('docker container restart tiny9')
+                time.sleep(15)
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+

+ 58 - 0
SEO/run_ranking_day.py

@@ -0,0 +1,58 @@
+import sys
+import random
+import os
+import time
+import fire
+import dataset
+from random import randint
+#import pymysql
+#pymysql.install_as_MySQLdb()
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term,domain from seo.selected_kw')
+    for c in cursor:
+        lst.append([c['term'], c['domain']])
+
+    cur = db.query('select * from seo.google_rank order by id  desc limit 1')
+    count_row = db.query('select count(*) from seo.google_rank where CAST(dt AS DATE) = CAST( curdate() AS DATE)')
+    for c in cur:
+        kw = c['keyword']
+    for c in count_row:
+        now_day_len = c['count(*)']
+        print(now_day_len)
+    if now_day_len == 0:
+        print('首位')
+        intval = os.system(
+            f'python3 ranking_day2.py get --kw="清原 中央" --domain="taroboba-yuan.com" --port="{str(pport)}"')
+    
+    for i in lst:
+        if i[0] == kw:
+            id = lst.index(i)
+    intval = os.system(f'python3 ranking_day2.py get --kw="{lst[id+1][0]}" --domain="{lst[id+1][1]}" --port="{str(pport)}"')
+
+    if intval == -1:
+        print('-1')
+        sys.exit()
+    print('執行完成ranking_day2.py')
+
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny8')
+                time.sleep(1)
+                run_once(9929,'tiny8')
+                time.sleep(20)
+                break
+            except:
+                os.system('docker container restart tiny9')
+                time.sleep(15)
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+

+ 8 - 0
SEO/seo_notify.py

@@ -20,7 +20,11 @@ import sys
 import docker
 import pymysql
 pymysql.install_as_MySQLdb()
+<<<<<<< HEAD
 from ga_click import main
+=======
+from monitor.ga_click import main
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
 
 headers = {
     "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ",
@@ -60,8 +64,12 @@ per_total_click = f"毛怪:{d['beastparadise.net']}\n清原:{d['taroboba-yuan.co
 print(per_total_click)
 msg_per_total_click = '\n點擊次數總計:\n' + per_total_click
 msg = "\n機器人執行次數:" + str(idx)
+<<<<<<< HEAD
 ga = main()['ga:sessions']
 ga_sessions = '\n幸福空間的GA Sessions'+ ga
+=======
+ga_sessions = '\n幸福空間的GA Sessions'+main()['ga:sessions']
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
 send_msg(msg_per_total_click + result + ga_sessions)
 
 

+ 128 - 0
SEO/tiny_click100.py

@@ -0,0 +1,128 @@
+import random
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import schedule
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import socket
+import os
+_LOG_SERVER = ('hhh.ptt.cx', 514)
+logger = logging.getLogger('clickbot_100')
+handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
+logger.addHandler(handler1)
+#logger.debug('[clickbot_100][清原]begin')
+
+hname=socket.gethostname()
+pid=str(os.getpid())
+logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')
+
+
+def restart_browser():
+    os.system('docker container restart headless-shell')
+    time.sleep(9)
+    chrome_options = webdriver.ChromeOptions()
+#    chrome_options.add_argument("--incognito")
+    chrome_options.add_argument("--headless")
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    chrome_options.add_argument("start-maximized")
+    chrome_options.add_argument("user-data-dir=/tmp")
+    chrome_options.debugger_address="127.0.0.1:9222"
+    chrome98=r'C:\portable\webdriver\chrome98\chromedriver.exe'
+#    chrome98=r'/root/drivers/98/chromedriver'
+
+    driver = webdriver.Chrome(chrome_options=chrome_options,executable_path=chrome98)
+ 
+
+    return driver
+
+
+def process_one():
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst=[]
+    table=db['save_result']
+    cursor=db.query('select term from selected_kw where client="清原" and term not in (SELECT distinct(keyword) FROM seo.save_result where url like "%taroboba-yuan.com%" and datediff(now(),dt)=0)')
+    for c in cursor:
+        lst.append(c['term'])
+
+    term=random.choice(lst)
+    print(term)
+    logger.debug('[clickbot_100]['+term+']')
+    driver=restart_browser()
+    escaped_search_term=urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
+
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    driver.save_screenshot('c:/tmp/test.png')
+
+    fname=term.replace(' ','_')
+    driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    df=pd.DataFrame()
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    clickelmt=None
+    cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            if 'taroboba-yuan.com' in href:
+                clickelmt=elmt
+                logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
+
+            print(href)
+            print(elmt.text)
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+
+            table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
+            cnt+=1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if clickelmt:
+        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+    if len(datadict['結果標題'])<=0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    df['搜尋詞']=datadict['搜尋詞']
+    df['結果標題']=datadict['結果標題']
+    df['結果網址']=datadict['結果網址']
+    df['結果名次']=datadict['結果名次']
+
+    df.to_excel('c:/tmp/seo/'+fname+".xls")
+
+    driver.quit()
+
+process_one()
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--loop')
+args = parser.parse_args()
+
+if args.loop:
+
+    schedule.every(6).minutes.do(process_one)
+#    schedule.every(0.4).minutes.do(process_one)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

Diferenças do arquivo suprimidas por serem muito extensas
+ 130 - 0
SEO關聯圖_網址關鍵字.html


BIN
choozmo/__pycache__/gsearch_general.cpython-310.pyc


BIN
choozmo/__pycache__/gsearch_general.cpython-39.pyc


+ 108 - 0
choozmo/click_commerce.py

@@ -0,0 +1,108 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import dataset
+import time
+import traceback
+import sys
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+
+def run_once(q):
+    global driver
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(
+    options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print(q)
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
+lst=[]
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+cursor=db.query('SELECT kw as term,domain FROM seo.seo_clickjobs where category="hhh-faq"  order by rand()')
+
+for c in cursor:
+    lst.append(c)
+
+
+#for c in lst:
+while True:
+    try:
+        c=random.choice(lst)
+        run_once( (c['term'],c['domain'])   )
+    except:
+        traceback.print_exc()
+    sleepint=random.randint(380,520)
+    time.sleep(sleepint)
+

+ 8 - 0
choozmo/cryptotest.py

@@ -0,0 +1,8 @@
+from passlib.context import CryptContext
+SECRET_KEY = "df2f77bd544240801a048bd4293afd8eeb7fff3cb7050e42c791db4b83ebadcd"
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_DAYS = 5
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+
+print(pwd_context.hash('123456'))
+

+ 43 - 0
choozmo/custom_profile_test.py

@@ -0,0 +1,43 @@
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+options = webdriver.ChromeOptions()
+options.add_argument("start-maximized")
+options.add_argument('user-data-dir=/opt/user')
+#options.add_argument('--profile-directory=Default')
+#options.add_argument('--profile-directory=Profile 1')
+#options.add_argument('--profile-directory=Profile 2')
+#options.add_argument('--profile-directory=Profile 3')
+#options.add_argument('--profile-directory=Profile 4')
+#options.add_argument('--profile-directory=Profile 5')
+#options.add_argument('--profile-directory=Profile 7')
+options.add_argument('--profile-directory=Profile 8')
+
+
+driver = webdriver.Remote(
+        command_executor='http://127.0.0.1:4444/wd/hub',
+    desired_capabilities=options.to_capabilities())
+driver.get('https://www.yahoo.com.tw')
+time.sleep(99999)
+#driver.set_window_size(1400,1000)

+ 1 - 0
choozmo/db_clickjob.py

@@ -18,6 +18,7 @@ import time
 import rpyc
 import sys
 import docker
+import json
 
 driver=None
 headers = {

+ 101 - 0
choozmo/fb_get_email.py

@@ -0,0 +1,101 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+def process_one(driver):
+    lst=[]
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+#            print(href)
+            txt=elmt.text.split('\n')
+            print(txt[0])
+            lst.append({'title':txt[0],'url':href})
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    return lst
+
+def process_query(driver,url):
+    driver.get(url)
+    time.sleep(4)
+    totallst=[]
+#    elmt=driver.find_element_by_xpath("//span[contains(text(),'關於')]/../span[contains(text(),'查看全部')]/a")
+    try:
+        elmt=driver.find_element_by_xpath("//span[contains(text(),'關於')]/..//a")
+        print(elmt)
+        print(elmt.text)
+        href=elmt.get_attribute('href')
+        driver.get(href)
+        time.sleep(5)
+        elmt=driver.find_element_by_xpath("//a[contains(@href,'mailto')]")
+        print(elmt.text)
+        print(elmt.get_attribute('href'))
+        return elmt.text
+#        time.sleep(3)
+    except:
+        print('not found')
+        return None    
+#    time.sleep(9999)
+#        try:
+#            elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+#        except:
+#            traceback.print_exc()
+#            print('pnnext exception')
+#            break
+#        time.sleep(1.5)
+#    return totallst
+
+
+result=[]
+driver=None
+
+def restart_browser():
+#    os.system('docker container restart p4444')
+#    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
+
+    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+    #driver = webdriver.Remote(
+    #    command_executor='http://127.0.0.1:4444/wd/hub',
+    #desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+cursor=db.query('select title,url from fb_gsearch order by rand()')
+lst=[]
+for c in cursor:
+    lst.append(c)
+    
+table=db['fb_progress']
+driver=restart_browser()
+for c in lst:
+    email=process_query(driver,c['url'])
+    table.insert({'url':c['url'],'email':email})
+    time.sleep(3)

+ 21 - 0
choozmo/fb_gsearch.py

@@ -0,0 +1,21 @@
+import gsearch_general
+import sys
+import dataset
+import datetime
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['fb_gsearch']
+
+
+driver=gsearch_general.restart_browser()
+idx=gsearch_general.process_query(driver,'site:facebook.com pages 道路救援',number_results=100,language_code='zh-TW')
+print(idx)
+for x in idx:
+    x['dt']=datetime.datetime.now()
+    table.insert(x)
+
+if idx==None:
+    print(driver.page_source)
+    if '我們的系統偵測到您的電腦網路送出的流量有異常情況' in driver.page_source:
+        print('baned.....')
+        sys.exit()

+ 42 - 0
choozmo/fetch_content.py

@@ -0,0 +1,42 @@
+
+from bs4 import BeautifulSoup
+import requests
+import html2text
+import jieba
+import dataset
+
+jieba.load_userdict("c:/tmp/userdict.txt")
+stopwords=', 的/-。*.|)(][_!、」「::jpgmenu有了也gif%stylespnghttpsimagesicogovRSSscript'
+db = dataset.connect('sqlite:///c:/tmp/jieba.db')
+db.query('delete from tmp')
+#db.query('drop table tmp')
+
+urls=['https://www.nightnight.tw/%E5%BA%8A%E5%A2%8A%E6%8E%A8%E8%96%A6/']
+
+
+#db = dataset.connect('sqlite:///:memory:')
+table=db['tmp']
+
+# request web page
+#resp = requests.get("https://casino543.com/2021%E5%B9%B4%E5%8D%81%E5%A4%A7%E7%B7%9A%E4%B8%8A%E5%A8%9B%E6%A8%82%E5%9F%8E%E6%8E%92%E5%90%8D%E6%8E%A8%E8%96%A6-%E5%A8%9B%E6%A8%82%E5%9F%8E%E5%89%8D100%E5%90%8D%E5%A8%9B%E6%A8%82%E5%9F%8E%E9%82%84/")
+#resp = requests.get("https://mort.moi.gov.tw/frontsite/cms/newsAction.do?method=viewContentDetail&iscancel=true&contentId=MjU3NA==")
+#resp = requests.get("https://www.memory.com.tw/funeral_ceremony-in.php?i=5&c=3")
+for url in urls:
+    resp = requests.get(url)
+    html = resp.content
+    html=html.decode('utf-8')
+    h = html2text.HTML2Text()
+
+    h.ignore_links = True
+
+    docs=h.handle(html )
+    words = jieba.cut(docs, cut_all=False)
+    for word in words:
+        if word not in stopwords:
+            table.insert({'word':word})
+
+
+cursor=db.query('select word,count(word) as cnt from tmp group by word having count(word) >2 order by count(word) desc')
+for c in cursor:
+    print(c['word'])
+    print(c['cnt'])

+ 164 - 0
choozmo/fetch_serp.py

@@ -0,0 +1,164 @@
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+
+#cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where ranking is not null;')
+#cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where kw not in (select distinct kw from hhh_contentgap_serp where id >= 155)')
+
+kwlst={}
+#for c in cursor:
+#    kwlst[c['kw']]=1
+
+
+
+
+
+table=db['hhh_contentgap_serp']
+curdir=os.path.realpath('.')
+
+#fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8')
+#fr=codecs.open(curdir+os.sep+'hhh\\seo\\contentgap.txt','r','utf-8')
+fr=codecs.open('C:\\gitlab\\kw_tools\\kw_tools\\hhh\\SEO\\contentgap.txt','r','utf-8')
+lines=fr.readlines()
+lst=[]
+for l in lines:
+    lst.append(l.replace('\n',''))
+
+
+
+
+
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs,number_results=10,language_code='en',pat='hhh.com.tw'):
+    global driver
+    escaped_search_term=urllib.parse.quote(qs)
+#    escaped_search_term = qs.replace(' ', '+')
+#    googleurl='https://www.google.com/search?q='+
+    googleurl='https://www.google.com/search?q=site:hsinfei.com'
+
+    driver.get(googleurl)
+    time.sleep(3)
+    print(driver.page_source)
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    print(elmts)
+    idx=0
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            print(str(idx)+': '+href)
+            if pat in href:
+                return idx
+            idx+=1
+        except:
+            print('href exception')
+
+    try:
+        elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+        webdriver.ActionChains(driver).move_to_element(elmt).perform()
+        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    except:
+        print('pnnext exception')
+        return None
+
+    time.sleep(4)
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            print(str(idx)+': '+href)
+            if pat in href:
+                return idx
+            idx+=1
+
+        except:
+            print('href2 exception')
+
+    time.sleep(9999)
+
+result=[]
+driver=None
+
+def restart_browser():
+    os.system('docker container restart p4444')
+#    client = docker.from_env()
+#    ls=client.containers.list()
+#    print(ls)
+#    ls[0].restart()
+    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=https://47.241.72.41:80")
+
+    #driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+    driver = webdriver.Remote(
+        command_executor='http://127.0.0.1:4444/wd/hub',
+#        command_executor='http://172.104.93.163:4444/wd/hub', 
+    #command_executor='http://dev2.choozmo.com:14444/wd/hub',
+    desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    print('driver ready')
+    return driver
+
+
+for l in lst:
+#for l in lst[21:]:
+
+#for l in lst[32:]:
+#for l in lst[42:]:
+
+    if True:
+#    if kwlst.get(l) is None:
+        driver=restart_browser()
+
+    #    l='房間 油漆'
+    #    idx=process_query(,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
+        idx=process_query(l,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
+#        if idx is None:
+#            sys.exit()
+        table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()})
+        print({'kw':l,'ranking':idx})
+        db.commit()
+    #    time.sleep(9999)
+
+#        time.sleep(4)

+ 17 - 0
choozmo/gen_counter_db.py

@@ -0,0 +1,17 @@
+import time
+import dataset
+import os
+import codecs
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+table=db['kw_contentgap']
+fr=codecs.open('C:\\gitlab\\kw_tools\\kw_tools\\hhh\\SEO\\contentgap.txt','r','utf-8')
+lines=fr.readlines()
+lst=[]
+for l in lines:
+    table.insert({'kw':l.replace('\n','')})
+db.commit()
+
+
+#table=db['counter']
+#table.insert({'cnt':0})

+ 356 - 0
choozmo/gen_seo.py

@@ -0,0 +1,356 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+import redis
+import sys
+import codecs
+import random
+import os
+import time
+from userAgentRandomizer import userAgents
+import requests
+driver=None
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+headers = {
+        "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+def send_msg(kw):
+    params = {"message":kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver=None
+    try:
+        ua = userAgents()
+
+        user_agent = ua.random()        
+
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        options.add_argument("--headless")
+
+        print(user_agent)
+#        options.add_argument("--user-agent=" +user_agent)
+        options.add_argument("--incognito")
+        driver=None
+        try:
+            driver = webdriver.Chrome(options=options)
+        except:
+#            driver.quit()
+#            os.system('pkill -f ')
+            os.system('kill %d' % os.getpid())
+            sys.exit()
+            return
+        driver.set_window_size(1400,1000)
+        return
+    except:
+        import traceback
+        traceback.print_exc()
+        driver=None
+        return None
+
+
+
+def from_shopping(kw):
+    global driver
+    driver.get('https://shopping.google.com')
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
+    elmt.send_keys('幸福空間') 
+    elmt.send_keys(Keys.ENTER) #hits space
+    time.sleep(7)
+    elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+
+def from_book(kw):
+    global driver
+    driver.get('https://books.google.com/')
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
+    elmt.send_keys('幸福空間') 
+    elmt.send_keys(Keys.ENTER) #hits space
+    time.sleep(7)
+    elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+
+
+def from_wiki(kw):
+    global driver
+    driver.get('https://en.wikipedia.org/wiki/Google_Search')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_bing(kw):
+    global driver
+    driver.get('https://www.bing.com/search?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_ecosia(kw):
+    global driver
+    driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_brave(kw):
+    global driver
+    driver.get('https://search.brave.com/search?q=google&source=web')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+
+
+
+def from_duckduckgo(kw):
+    global driver
+    driver.get('https://duckduckgo.com/?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+#    time.sleep(9999)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+def from_ekoru(kw):
+    global driver
+    driver.get('https://www.ekoru.org/?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+#    time.sleep(9999)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+
+
+
+def from_yahoo(kw):
+    global driver
+    driver.get('https://search.yahoo.com/search?p=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(2)
+    driver.switch_to.window(driver.window_handles[1])
+    time.sleep(3)
+    print(driver.current_url)
+    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_gibiru(kw):
+    global driver
+    driver.get('https://gibiru.com/results.html?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(2)
+    driver.switch_to.window(driver.window_handles[1])
+    time.sleep(3)
+    print(driver.current_url)
+    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+
+def run_once(jsobj):
+
+    table=db['rank_detection']
+    print(jsobj)
+    global driver
+
+
+#    i=random.randint(0,9)
+    i=100
+    if driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        kw=jsobj['kw']
+        domain=jsobj['domain']
+#        kw='leo 娛樂城 484'
+#        kw='leo 娛樂城 3011'
+
+#        domain='tha484.com'
+#        domain='tha3011.com'
+
+
+        if i==0:
+            from_book(kw)
+        elif i==1:
+            from_shopping(kw)
+        elif i==2:
+            from_wiki(kw)
+        elif i==3:
+            from_bing(kw)
+        elif i==4:
+            from_duckduckgo(kw)
+        elif i==5:
+            from_yahoo(kw)
+        elif i==6:
+            from_gibiru(kw)
+        elif i==7:
+            from_ekoru(kw)
+        elif i==8:
+            from_ecosia(kw)
+        elif i==9:
+            from_brave(kw)
+#        time.sleep(9999)
+
+        driver.get('https://www.google.com?num=100')
+        time.sleep(3)
+        print(driver.current_url)
+        elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+        time.sleep(1)
+        elmt.send_keys(kw)
+        elmt.send_keys(Keys.ENTER)
+        time.sleep(6)
+
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+
+        numresults=len(elmts)
+        print('搜尋結果數量',numresults)
+        if numresults==0:
+            send_msg('stop working...')
+            sys.exit()
+
+        idx=1
+        found=False
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if domain in href:
+                    print('found....')
+                    print('clicked....')
+                    print(href)
+                    print(txt)
+                    print("ranking", idx)
+                    table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
+                    found=True
+                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                    time.sleep(5)
+                    break
+            idx+=1
+        if not found:
+            table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
+
+    except:
+        print('exception')
+        traceback.print_exc()
+
+    driver.quit()
+    sys.exit()
+
+
+
+r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
+
+##data=r.get('personal_seo')
+#jstext=data.decode('utf-8')
+#jsobj=json.loads(jstext)
+
+#js=random.choice(jsobj)
+#js=['seo','台北','新北','新竹','竹北','台灣','最強','集仕多','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast']
+js=['seo','台北','新北','新竹','竹北','台灣','最強','choozmo','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast','pptx']
+
+elmt1=random.choice(js)
+elmt2=random.choice(js)
+
+#run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" choozmo"})
+#run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" 集仕多"})
+run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2})
+
+#run_once({'domain':'choozmo.com','kw':elmt1+"  集仕多"})
+#run_once({'domain':'choozmo.com','kw':"企業 研發 委外"})
+#run_once({'domain':'choozmo.com','kw':"企業 系統 研發"})
+#run_once({'domain':'choozmo.com','kw':"企業 研發 方案 委外"})
+#run_once({'domain':'choozmo.com','kw':"集仕多 委外"})
+

+ 72 - 0
choozmo/gmailtest.py

@@ -0,0 +1,72 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# [START gmail_quickstart]
+from __future__ import print_function
+
+import os.path
+
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+
+# If modifying these scopes, delete the file token.json.
+SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
+
+
+def main():
+    """Shows basic usage of the Gmail API.
+    Lists the user's Gmail labels.
+    """
+    creds = None
+    # The file token.json stores the user's access and refresh tokens, and is
+    # created automatically when the authorization flow completes for the first
+    # time.
+    if os.path.exists('token.json'):
+        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
+    # If there are no (valid) credentials available, let the user log in.
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(
+                'c:\\keys\\client_secret_392946835471.json', SCOPES)
+            creds = flow.run_local_server(port=0)
+        # Save the credentials for the next run
+        with open('token.json', 'w') as token:
+            token.write(creds.to_json())
+
+    try:
+        # Call the Gmail API
+        service = build('gmail', 'v1', credentials=creds)
+        results = service.users().labels().list(userId='me').execute()
+        labels = results.get('labels', [])
+
+        if not labels:
+            print('No labels found.')
+            return
+        print('Labels:')
+        for label in labels:
+            print(label['name'])
+
+    except HttpError as error:
+        # TODO(developer) - Handle errors from gmail API.
+        print(f'An error occurred: {error}')
+
+
+if __name__ == '__main__':
+    main()
+# [END gmail_quickstart]

+ 20 - 13
choozmo/googlenews.py

@@ -4,21 +4,26 @@ from GoogleNews import GoogleNews
 
 def news(title_and_link):
     for i in title_and_link:
-        article = Article(i['link'])
-        article.download()
-        article.parse()
-        text = article.text
-        title = i['title']
         try:
-            with open(f'/googlenews/{title}.txt', 'x') as f:
-                f.write(text)
-        except FileExistsError:
-            print('已有同樣新聞文章')
+            article = Article(i['link'])
+            article.download()
+            article.parse()
+            text = article.text
+            title = i['title']
+            try:
+                print(title)
+                with open(f'/Users/zooeytsai/ig_tags/googlenews/{title}.txt','x') as f:
+                    f.write(text)
+            except FileExistsError:
+                print('已有同樣新聞文章','title')
+                pass
+        except:
+            print('403')
             pass
     return text
 
 
-def google_news(keyword, page):
+def google_news(keyword,page):
     g = GoogleNews()
     g.setlang('cn')
     g.setencode('utf-8')
@@ -29,7 +34,7 @@ def google_news(keyword, page):
         g.get_page(page)
         result = g.result()
         for j in result:
-            d = {'title': j['title'], 'link': j['link']}
+            d ={'title':j['title'],'link':j['link']}
             news_link.append(d)
     g.clear()
     print(len(news_link))
@@ -37,5 +42,7 @@ def google_news(keyword, page):
     return news_link
 
 
-result = google_news('地震', 1)
-news(result)
+k = ['寵物蛋糕','寵物住宿','寵物餐廳','寵物用品','寵物美容','寵物友善']
+for i in k:
+    result = google_news(i,5)
+    news(result)

+ 110 - 0
choozmo/gsc_tree.py

@@ -0,0 +1,110 @@
+import traceback
+import dataset
+import codecs
+import sys
+import pickle
+import os
+import searchconsole
+import pandas as pd
+import networkx as nx
+#import pysftp
+import codecs
+import pyvis
+import sys
+import pickle
+import os
+import searchconsole
+from pyvis.network import Network
+import jieba
+
+
+#db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
+#db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+
+#db.begin()
+db = dataset.connect('sqlite:///:memory:')
+table=db['tmp']
+#table=db['gsc_page_query_year']
+#pname='korea'
+rid=0
+
+def checkig():
+    global instl
+    global table
+    global pname
+    global rid
+    lst=[]
+    cntdict={}
+    codelist={}
+    idx=0
+    flag_break=False
+
+    fname=os.path.abspath(__file__)
+    elmts=fname.split(os.path.sep)
+    path2=os.path.sep.join(elmts[0:-1])
+    keysdir=path2+os.path.sep+'../keys'+os.path.sep
+
+    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
+#    account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
+    G = nx.Graph()
+
+#    webproperty = account['https://ipromise.com.tw/']
+#    webproperty = account['sc-domain:face8ook.org']
+#    webproperty = account['sc-domain:hhh.com.tw']
+#   webproperty = account['sc-domain:hhh.com.tw']
+
+#    webproperty = account['https://www.damanwoo.com/']
+    webproperty = account['https://innews.com.tw/']
+
+#    report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
+#    report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
+#    report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
+#    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
+#    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
+    report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
+
+    result=[]
+    rdict={}
+    total_idx=0
+
+    for r in report.rows:
+        if 'hhh.com.tw/designers/cases/' not in r[0]:
+            continue
+        if rdict.get(r[0]) is None:
+            total_idx+=1
+            rid=total_idx
+            rdict[r[0]]=rid
+        else:
+            rid=rdict[r[0]]
+        entry={'page':r[0],'query':r[1],'rid':rid}
+        result.append(entry)
+
+    print('list done')
+
+    for r in result:
+        table.insert(r)
+    db.commit()
+    print('db done')
+
+#    cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
+    cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
+
+    riddict={}
+    prev=''
+    curnode=''
+    cururl=''
+
+    total_idx=0
+    for c in cursor:
+        G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
+    remove=[]
+    G.remove_edges_from(nx.selfloop_edges(G))
+    G2=G
+    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
+
+    pyG.from_nx(G2)
+    pyG.show('news.html')
+
+
+r=checkig()
+

+ 110 - 0
choozmo/gsc_tree2.py

@@ -0,0 +1,110 @@
+import searchconsole
+import dataset
+import os
+import networkx as nx
+from pyvis.network import Network
+import sys
+
+db = dataset.connect('sqlite:///:memory:')
+table=db['tmp']
+rid=0
+
+def checkig():
+    global instl
+    global table
+    global pname
+    global rid
+    lst=[]
+    cntdict={}
+    codelist={}
+    idx=0
+    flag_break=False
+
+    fname=os.path.abspath(__file__)
+    elmts=fname.split(os.path.sep)
+    path2=os.path.sep.join(elmts[0:-1])
+    keysdir=path2+os.path.sep+'../keys'+os.path.sep
+
+#    account = searchconsole.authenticate(client_config='/Users/zooeytsai/Downloads/client_secret4.json')
+
+#    account = searchconsole.authenticate(client_config='c:/keys/client_secret4.json',serialize='c:/keys/credentials20220524.json')
+    account = searchconsole.authenticate(client_config='c:/keys/client_secret4.json',credentials='c:/keys/credentials20220524.json')
+
+#    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
+
+    import pprint
+    import codecs
+#    pprint.pprint(vars(account.service.sites))
+#    pprint.pprint(vars(account.service))
+    site_list = account.service.sites().list().execute()
+    print(site_list)
+#    print(account.service.sites().list())
+#    fw=codecs.open('c:/tmp/pprint.txt','w','utf-8')
+#    fw.write(pprint.pformat(vars(account)))
+#    fw.close()
+#    sys.exit()
+#  credentials='credentials.json'
+#    print(account.)
+
+#    webproperty = account['https://bennis.com.tw/']
+    webproperty = account['sc-domain:bennis.com.tw']
+
+
+    print(webproperty)
+#    report = webproperty.query.range('today', days=-7).dimension('query').get()
+    report=webproperty.query.range('today', days=-4).dimension('page','query').get()
+
+
+    print(report)
+
+    # report = webproperty.query.range('2022-05-20', '2022-05-23').dimension('page', 'query').get()
+#    account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
+    G = nx.Graph()
+
+#    webproperty = account['https://ipromise.com.tw/']
+#    webproperty = account['sc-domain:face8ook.org']
+#     webproperty = account['sc-domain:hhh.com.tw']
+
+
+    result=[]
+    rdict={}
+    total_idx=0
+
+    for r in report.rows:
+        if rdict.get(r[0]) is None:
+            total_idx+=1
+            rid=total_idx
+            rdict[r[0]]=rid
+        else:
+            rid=rdict[r[0]]
+        entry={'page':r[0],'query':r[1],'rid':rid}
+        result.append(entry)
+
+    print('list done')
+    print(result)
+    for r in result:
+        table.insert(r)
+    db.commit()
+    print('db done')
+
+#    cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
+    cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
+
+    riddict={}
+    prev=''
+    curnode=''
+    cururl=''
+
+    total_idx=0
+    for c in cursor:
+        G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
+    remove=[]
+    G.remove_edges_from(nx.selfloop_edges(G))
+    G2=G
+    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
+
+    pyG.from_nx(G2)
+    pyG.show('news.html')
+
+
+r=checkig()

+ 112 - 0
choozmo/gsc_tree3.py

@@ -0,0 +1,112 @@
+import searchconsole
+import dataset
+import os
+import networkx as nx
+from pyvis.network import Network
+import sys
+
+db = dataset.connect('sqlite:///:memory:')
+table=db['tmp']
+rid=0
+
+def checkig():
+    global instl
+    global table
+    global pname
+    global rid
+    lst=[]
+    cntdict={}
+    codelist={}
+    idx=0
+    flag_break=False
+
+    fname=os.path.abspath(__file__)
+    elmts=fname.split(os.path.sep)
+    path2=os.path.sep.join(elmts[0:-1])
+    keysdir=path2+os.path.sep+'../keys'+os.path.sep
+
+#    account = searchconsole.authenticate(client_config='/Users/zooeytsai/Downloads/client_secret4.json')
+
+#    account = searchconsole.authenticate(client_config='c:/keys/client_secret4.json',serialize='c:/keys/credentials20220524.json')
+    account = searchconsole.authenticate(client_config='c:/keys/client_secret4.json',credentials='c:/keys/credentials20220524.json')
+
+#    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
+
+    import pprint
+    import codecs
+#    pprint.pprint(vars(account.service.sites))
+#    pprint.pprint(vars(account.service))
+    site_list = account.service.sites().list().execute()
+    print(site_list)
+#    print(account.service.sites().list())
+#    fw=codecs.open('c:/tmp/pprint.txt','w','utf-8')
+#    fw.write(pprint.pformat(vars(account)))
+#    fw.close()
+#    sys.exit()
+#  credentials='credentials.json'
+#    print(account.)
+
+#    webproperty = account['https://bennis.com.tw/']
+    webproperty = account['sc-domain:bennis.com.tw']
+
+
+    print(webproperty)
+#    report = webproperty.query.range('today', days=-7).dimension('query').get()
+    report=webproperty.query.range('today', days=-4).dimension('page','query').get()
+
+
+    print(report)
+
+    # report = webproperty.query.range('2022-05-20', '2022-05-23').dimension('page', 'query').get()
+#    account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
+    G = nx.Graph()
+
+#    webproperty = account['https://ipromise.com.tw/']
+#    webproperty = account['sc-domain:face8ook.org']
+#     webproperty = account['sc-domain:hhh.com.tw']
+
+
+    result=[]
+    rdict={}
+    total_idx=0
+
+    for r in report.rows:
+        if rdict.get(r[0]) is None:
+            total_idx+=1
+            rid=total_idx
+            rdict[r[0]]=rid
+        else:
+            rid=rdict[r[0]]
+        entry={'page':r[0],'query':r[1],'rid':rid}
+        result.append(entry)
+
+    print('list done')
+    print(result)
+    for r in result:
+        table.insert(r)
+    db.commit()
+    print('db done')
+
+#    cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
+    cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
+
+    riddict={}
+    prev=''
+    curnode=''
+    cururl=''
+
+    total_idx=0
+    for c in cursor:
+        G.add_edge(c['q'],c['rid'],borderwidth=3)
+#        G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
+
+    remove=[]
+    G.remove_edges_from(nx.selfloop_edges(G))
+    G2=G
+    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
+
+    pyG.from_nx(G2)
+    pyG.show('news.html')
+
+
+r=checkig()

+ 5 - 4
choozmo/gsearch_general.py

@@ -13,14 +13,14 @@ import random
 from bs4 import BeautifulSoup
 import requests
 import time
-import rpyc
+# import rpyc
 import sys
 import docker
-import  googlesearch
+# import googlesearch
 import codecs
 import sys
 import time
-import dataset
+# import dataset
 import os
 
 def process_one(driver):
@@ -50,6 +50,7 @@ def process_query(driver,qs,number_results=10,language_code='en',enable_next=Tru
         totallst+=lst
         try:
             if enable_next:
+                time.sleep(3)
                 elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
                 webdriver.ActionChains(driver).move_to_element(elmt).perform()
                 webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
@@ -72,7 +73,7 @@ def restart_browser():
 
     options = webdriver.ChromeOptions()
 #    options.add_argument("--proxy-server=http://80.48.119.28:8080")
-    driver=webdriver.Chrome(options=options)
+    driver=webdriver.Chrome(executable_path='/Users/zooeytsai/Downloads/chromedriver',options=options)
 
 #    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
     #driver = webdriver.Remote(

+ 3 - 0
choozmo/gsearch_libtest.py

@@ -0,0 +1,3 @@
+from googlesearch import search
+for url in search('"Breaking Code" WordPress blog', stop=20):
+    print(url)

+ 11 - 4
choozmo/igool/igtree.py

@@ -50,8 +50,15 @@ import pickle
 #kw='馬卡龍'
 #kw='馬林糖'
 #kw='檸檬塔'
-kw='泡芙'
-
+#kw='泡芙'
+kw='mean snapchat'
+#kw='留學'
+#kw='勞力士'
+#kw='白蟻'
+#kw='影片製作'
+#kw='ai 合成'
+
+#kw='菲律賓'
 #kw='生巧克力'
 #kw='牛奶巧克力'
 
@@ -59,8 +66,8 @@ kw='泡芙'
 
 #s={'suggests':[]}
 s = suggests.suggests.get_suggests(kw, source='google')
-#G = nx.Graph()
-G = pickle.load( open( "gs2.p", "rb" ) )
+G = nx.Graph()
+#G = pickle.load( open( "gs2.p", "rb" ) )
 
 
 #G.remove_node('巧克力囊腫')

+ 16 - 0
choozmo/imp_selected.py

@@ -0,0 +1,16 @@
+import dataset
+import codecs
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+
+table=db['selected_kw']
+
+fr=codecs.open('C:\\tmp\\selected.csv','r','utf-8')
+lines=fr.readlines()
+lst=[]
+for l in lines:
+    term=l.replace('\n','')
+    term=term.replace('\r','')
+    table.insert({'term':term,'client':'清原'})
+
+

+ 34 - 0
choozmo/imp_ytlist.py

@@ -0,0 +1,34 @@
+import json
+import os
+import csv
+import dataset
+import datetime
+import codecs
+import sys
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['sbir_yt']
+
+fr=codecs.open('c:/tmp/sbir_yt.csv','r','utf-8')
+lines=fr.readlines()
+fr.close()
+for l in lines:
+    tmp=l.replace('https://youtu.be/','').strip()
+    table.insert({'url':tmp})
+
+sys.exit()
+
+
+table=db['customer_list']
+print (os.getcwd())
+print(os.path.realpath('.'))
+
+
+cursor=db.query('select * from customer_list')
+for c in cursor:
+    print(c['name'])
+    print(c['updated'])
+
+#jstr=json.dumps(qlist)
+#print(jstr)
+

+ 91 - 0
choozmo/linkedin_detail.py

@@ -0,0 +1,91 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+
+def scrolling(driver,pgnum):
+    ub = driver.find_element_by_css_selector('body')
+    for i in range(pgnum):
+        ub.send_keys(Keys.PAGE_DOWN)
+        if pgnum>1:
+            time.sleep(0.3)
+
+
+def process_query(driver,c):
+    global db
+    url=c['href']
+    driver.get('https://www.linkedin.com/in/'+url+'/overlay/contact-info/')
+    time.sleep(3)
+    email=''
+    phone=''
+    try:
+        e_email=driver.find_element_by_xpath(".//a[contains(@href,'mailto:') ]")
+        print(e_email.text)
+        email=e_email.text
+        print(e_email.get_attribute('href'))
+    except:
+        print('no email')
+    try:
+        e_phone=driver.find_element_by_xpath("//section[@class='pv-contact-info__contact-type ci-phone' ]//li[contains(@class,'pv-contact-info__ci-container')]")
+        print(e_phone.text)
+        phone=e_phone.text
+    except:
+        print('no phone')
+
+    return {'email':email,'phone':phone}
+
+
+def restart_browser():
+#    os.system('docker container restart p4444')
+#    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
+    options.add_argument("start-maximized")
+    options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
+    options.add_argument('--profile-directory=Default')
+
+    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+    #driver = webdriver.Remote(
+    #    command_executor='http://127.0.0.1:4444/wd/hub',
+    #desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+lst=[]
+cursor=db.query('select id,href from linkedin_list where email is null order by rand()')
+for c in cursor:
+    lst.append(c)
+
+driver=restart_browser()
+for c in lst:
+    data=process_query(driver,c)    
+    db.query('update linkedin_list set email="'+data['email']+'", phone="'+data['phone']+'" where id="'+str(c['id'])+'"')
+    print(data)
+    time.sleep(2)
+
+time.sleep(9999)

+ 19 - 0
choozmo/mail_list_imp_csv.py

@@ -0,0 +1,19 @@
+import time
+from cv2 import line
+import dataset
+import os
+import codecs
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['mail_list']
+
+fr=codecs.open(r'C:\Users\jared\Downloads\meettaipei.csv','r','utf-8')
+lines=fr.readlines()
+fr.close()
+
+for l in lines:
+    l=l.replace('\n','').replace('\r','')
+    elmts=l.split(',')
+    body={'title':elmts[0],'email':elmts[1],'enabled':1}
+    print(body)
+    table.insert(body)

+ 112 - 0
choozmo/meta_1777.py

@@ -0,0 +1,112 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+
+import json
+import random
+import time
+import sys
+import codecs
+import random
+import os
+import time
+from userAgentRandomizer import userAgents
+
+driver=None
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver=None
+    try:
+        ua = userAgents()
+
+        user_agent = ua.random()        
+
+        options = webdriver.ChromeOptions()
+
+
+        mobile_emulation = {
+            "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
+            "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
+#        options.add_experimental_option("mobileEmulation", mobile_emulation)
+
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        options.add_argument("--headless")
+
+#        print(user_agent)
+#        options.add_argument("--user-agent=" +user_agent)
+        options.add_argument("--incognito")
+        driver=None
+        try:
+            driver = webdriver.Chrome(options=options)
+        except:
+#            driver.quit()
+#            os.system('pkill -f ')
+            os.system('kill %d' % os.getpid())
+            sys.exit()
+            return
+        driver.set_window_size(1400,1000)
+        return
+    except:
+        import traceback
+        traceback.print_exc()
+        driver=None
+        return None
+
+def run_once(url):
+    global driver
+    i=random.randint(0,7)
+    if i==0 or driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        driver.get(url)
+        time.sleep(8)
+        
+        elmt = driver.find_element(By.XPATH, "//a[contains(@href,'HxXfN9r')]")
+        webdriver.ActionChains(driver).move_to_element(elmt).perform()
+        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+        time.sleep(10)
+        driver.quit()
+        print("DONE!!!!")
+#        driver.execute_script('window.open("'+url+'","_blank");')
+#        driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+#        time.sleep(0.5)
+    except:
+        traceback.print_exc()
+        print('exception')
+#        time.sleep(9999)
+
+
+lst=[]
+
+
+
+#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
+    #for i in range(20):
+while True:
+    l='https://cmm.ai/meta'
+    print(l)
+    try:
+        run_once(l)
+        time.sleep(2)
+    except:
+        traceback.print_exc()

+ 12 - 0
choozmo/phantomtest.py

@@ -0,0 +1,12 @@
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+
+driver = webdriver.Remote(
+    command_executor='http://127.0.0.1:8910',
+    desired_capabilities=DesiredCapabilities.PHANTOMJS)
+
+driver.get('http://www.yahoo.com')
+print(driver.page_source)
+#driver.find_element_by_css_selector('a[title="hello"]').click()
+
+driver.quit()

+ 319 - 0
choozmo/priv_seo.py

@@ -0,0 +1,319 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+import redis
+import sys
+import codecs
+import random
+import os
+import time
+from userAgentRandomizer import userAgents
+
+driver=None
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver=None
+    try:
+        ua = userAgents()
+
+        user_agent = ua.random()        
+
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+#        options.add_argument("--headless")
+
+        print(user_agent)
+#        options.add_argument("--user-agent=" +user_agent)
+        options.add_argument("--incognito")
+        driver=None
+        try:
+            driver = webdriver.Chrome(options=options)
+        except:
+#            driver.quit()
+#            os.system('pkill -f ')
+            os.system('kill %d' % os.getpid())
+            sys.exit()
+            return
+#        driver.set_window_size(1400,1000)
+        driver.minimize_window()
+        return
+    except:
+        import traceback
+        traceback.print_exc()
+        driver=None
+        return None
+
+
+
+def from_shopping(kw):
+    global driver
+    driver.get('https://shopping.google.com')
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
+    elmt.send_keys(kw) 
+    elmt.send_keys(Keys.ENTER) #hits space
+    time.sleep(7)
+    elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+
+def from_book(kw):
+    global driver
+    driver.get('https://books.google.com/')
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
+    elmt.send_keys(kw) 
+    elmt.send_keys(Keys.ENTER) #hits space
+    time.sleep(7)
+    elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+
+
+def from_wiki(kw):
+    global driver
+    driver.get('https://en.wikipedia.org/wiki/Google_Search')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_bing(kw):
+    global driver
+    driver.get('https://www.bing.com/search?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_ecosia(kw):
+    global driver
+    driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_brave(kw):
+    global driver
+    driver.get('https://search.brave.com/search?q=google&source=web')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+
+
+
+def from_duckduckgo(kw):
+    global driver
+    driver.get('https://duckduckgo.com/?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+#    time.sleep(9999)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+def from_ekoru(kw):
+    global driver
+    driver.get('https://www.ekoru.org/?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(5)
+#    time.sleep(9999)
+    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+
+
+
+
+def from_yahoo(kw):
+    global driver
+    driver.get('https://search.yahoo.com/search?p=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(2)
+    driver.switch_to.window(driver.window_handles[1])
+    time.sleep(3)
+    print(driver.current_url)
+    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def from_gibiru(kw):
+    global driver
+    driver.get('https://gibiru.com/results.html?q=google')
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(2)
+    driver.switch_to.window(driver.window_handles[1])
+    time.sleep(3)
+    print(driver.current_url)
+    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+
+def from_google(kw):
+    global driver
+    driver.get('https://www.google.com?num=30')
+    print(kw)
+    time.sleep(4)
+    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+
+def run_once(jsobj):
+    global driver
+
+
+    i=random.randint(0,9)
+    i=10
+    if driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        kw=jsobj['kw']
+        domain=jsobj['domain']
+#        kw='leo 娛樂城 484'
+#        kw='leo 娛樂城 3011'
+
+#        domain='tha484.com'
+#        domain='tha3011.com'
+
+
+        if i==0:
+            from_book(kw)
+        elif i==1:
+            from_shopping(kw)
+        elif i==2:
+            from_wiki(kw)
+        elif i==3:
+            from_bing(kw)
+        elif i==4:
+            from_duckduckgo(kw)
+        elif i==5:
+            from_yahoo(kw)
+        elif i==6:
+            from_gibiru(kw)
+        elif i==7:
+            from_ekoru(kw)
+        elif i==8:
+            from_ecosia(kw)
+        elif i==9:
+            from_brave(kw)
+        else:
+            from_google(kw)
+#        time.sleep(9999)
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+
+        print('搜尋結果數量',len(elmts))
+        idx=1
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if domain in href:
+                    print('found....')
+                    print('clicked....')
+                    print(href)
+                    print(txt)
+                    print("ranking", idx)
+                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                    time.sleep(5)
+                    break
+            idx+=1
+
+    except:
+        print('exception')
+        traceback.print_exc()
+
+    driver.quit()
+    sys.exit()
+
+
+
+r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
+
+data=r.get('personal_seo')
+jstext=data.decode('utf-8')
+jsobj=json.loads(jstext)
+
+js=random.choice(jsobj)
+run_once(js)
+

+ 7 - 0
choozmo/privseo.bat

@@ -0,0 +1,7 @@
+@echo off
+:while
+(
+   python priv_seo.py
+   goto :while
+)
+

+ 87 - 0
choozmo/profile_selenium.py

@@ -0,0 +1,87 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+def process_one(driver):
+    lst=[]
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+#            print(href)
+            txt=elmt.text.split('\n')
+            print(txt[0])
+            lst.append({'title':txt[0],'url':href})
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    return lst
+
+def process_query(driver,qs,number_results=10,language_code='en'):
+    escaped_search_term=urllib.parse.quote(qs)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code)
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(3)
+    totallst=[]
+    while True:
+        lst=process_one(driver)
+        totallst+=lst
+        try:
+            elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+        except:
+            traceback.print_exc()
+            print('pnnext exception')
+            break
+        time.sleep(1.5)
+    return totallst
+
+
+result=[]
+driver=None
+
+def restart_browser():
+#    os.system('docker container restart p4444')
+#    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+    options.add_argument("start-maximized")
+    options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
+    options.add_argument('--profile-directory=Profile 77')
+
+    driver=webdriver.Chrome(chrome_options=options)
+
+    #driver = webdriver.Remote(
+    #    command_executor='http://127.0.0.1:4444/wd/hub',
+    #desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+
+driver=restart_browser()
+driver.get('http://facebook.com')
+time.sleep(9999)

+ 42 - 0
choozmo/proxytest.py

@@ -0,0 +1,42 @@
+import urllib.request
+import socket
+import urllib.error
+import sys
+from Proxy_List_Scrapper import Scrapper, Proxy, ScrapperException
+
+def is_bad_proxy(pip):    
+    try:
+        proxy_handler = urllib.request.ProxyHandler({'http': pip})
+        opener = urllib.request.build_opener(proxy_handler)
+        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
+        urllib.request.install_opener(opener)
+        req=urllib.request.Request('http://www.google.com')  # change the URL to test here
+        sock=urllib.request.urlopen(req)
+    except urllib.error.HTTPError as e:
+        print('Error code: ', e.code)
+        return e.code
+    except Exception as detail:
+        print("ERROR:", detail)
+        return True
+    return False
+
+def main():
+    socket.setdefaulttimeout(120)
+#    scrapper = Scrapper(category='GOOGLE', print_err_trace=False)
+    scrapper = Scrapper(category='ALL', print_err_trace=False)
+
+#    print(data)
+    # two sample proxy IPs
+    data = scrapper.getProxies()
+    for d in data.proxies:
+        currentProxy=d.ip+':'+str(d.port)
+        print(d)
+        if is_bad_proxy(currentProxy):
+            print("Bad Proxy %s" % (currentProxy))
+        else:
+            print("%s is working" % (currentProxy))
+
+
+
+if __name__ == '__main__':
+    main() 

+ 15 - 0
choozmo/redis_test.py

@@ -0,0 +1,15 @@
+import redis
+import time
+import json
+import random
+r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
+
+vlen=r.llen('vtuber')
+for i in range(0,vlen ):
+    print(r.lpop('vtuber'))
+
+#    print(r.lindex('vtuber', i))
+#print(data)
+#r.rpush('vtuber','123')
+#r.rpush('vtuber','456')
+

+ 52 - 0
choozmo/save_search_result.py

@@ -0,0 +1,52 @@
+import gsearch_general
+import sys
+import dataset
+import datetime
+import codecs
+import time
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['save_result']
+
+driver=gsearch_general.restart_browser()
+fr=codecs.open('c:/tmp/food.csv','r','utf-8')
+lines=fr.readlines()
+fr.close()
+
+namedict={}
+cursor=db.query('select distinct(keyword) as kw from save_result')
+for c in cursor:
+    kw=c['kw'].replace('\n','')
+    kw=c['kw'].replace('\r','')
+    namedict[kw]=1
+
+print(namedict)
+print(lines)
+#time.sleep(9999)
+
+#keyword='檸檬原汁'
+#lines=['芋頭 外帶']
+#lines=['布丁 牛奶 冰']
+#lines=['芋圓 加盟']
+
+for l in lines:
+    l=l.replace('\r','')
+    l=l.replace('\n','')
+    if namedict.get(l) is not None:
+        continue
+    keyword=l
+    print(keyword)
+    idx=gsearch_general.process_query(driver,keyword,number_results=100,language_code='zh-TW',enable_next=False)
+    print(idx)
+    cnt=1
+    for x in idx:
+        x['keyword']=keyword
+        x['dt']=datetime.datetime.now()
+        x['num']=str(cnt)
+        cnt+=1
+        table.insert(x)
+#    if idx==None:
+#        print(driver.page_source)
+    if '我們的系統偵測到您的電腦網路送出的流量有異常情況' in driver.page_source:
+        print('baned.....')
+        sys.exit()
+

+ 104 - 0
choozmo/seo_hhh.py

@@ -0,0 +1,104 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import dataset
+import time
+import traceback
+import sys
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+
+def run_once(q):
+    global driver
+    print('run_once()')
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument("--user-agent=" +user_agent)
+#    options.add_argument("--incognito")
+    options.add_argument('--no-sandbox')
+    options.add_argument("--disable-gpu")
+    options.add_argument('--disable-dev-shm-usage')
+    driver = webdriver.Chrome(
+    options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print(q)
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
+lst=[]
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+cursor=db.query('SELECT kw as term,domain FROM seo.seo_clickjobs where category="hhh-faq"  order by rand()')
+
+for c in cursor:
+    lst.append(c)
+
+
+#for c in lst:
+while True:
+    try:
+        c=random.choice(lst)
+        run_once( (c['term'],c['domain'])   )
+    except:
+        traceback.print_exc()
+    sleepint=random.randint(290,420)
+    time.sleep(sleepint)
+

+ 50 - 0
choozmo/seo_routine.py

@@ -0,0 +1,50 @@
+import gsearch_general
+import sys
+import dataset
+import datetime
+import codecs
+import time
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['save_result']
+
+driver=gsearch_general.restart_browser()
+#fr=codecs.open('c:/tmp/food.csv','r','utf-8')
+#lines=fr.readlines()
+#fr.close()
+
+#namedict={}
+lines=[]
+cursor=db.query('SELECT term FROM seo.selected_kw where term not in (SELECT keyword FROM seo.save_result where datediff(now(),dt)=0);')
+for c in cursor:
+    kw=c['term'].replace('\n','')
+    kw=c['term'].replace('\r','')
+    lines.append(kw)
+
+print(lines)
+#time.sleep(9999)
+
+#keyword='檸檬原汁'
+#lines=['芋頭 外帶']
+#lines=['布丁 牛奶 冰']
+#lines=['芋圓 加盟']
+
+for l in lines:
+    l=l.replace('\r','')
+    l=l.replace('\n','')
+    keyword=l
+    print(keyword)
+    idx=gsearch_general.process_query(driver,keyword,number_results=100,language_code='zh-TW',enable_next=False)
+    print(idx)
+    cnt=1
+    for x in idx:
+        x['keyword']=keyword
+        x['dt']=datetime.datetime.now()
+        x['num']=str(cnt)
+        cnt+=1
+        table.insert(x)
+#    if idx==None:
+#        print(driver.page_source)
+    if '我們的系統偵測到您的電腦網路送出的流量有異常情況' in driver.page_source:
+        print('baned.....')
+        sys.exit()
+

+ 158 - 0
choozmo/serp_searchhome.py

@@ -0,0 +1,158 @@
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+
+#cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where ranking is not null;')
+#cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where kw not in (select distinct kw from hhh_contentgap_serp where id >= 155)')
+
+kwlst={}
+#for c in cursor:
+#    kwlst[c['kw']]=1
+
+
+
+
+
+table=db['searchome_contentgap_serp']
+curdir=os.path.realpath('.')
+
+#fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8')
+#fr=codecs.open(curdir+os.sep+'hhh\\seo\\contentgap.txt','r','utf-8')
+#fr=codecs.open('C:\\gitlab\\kw_tools\\kw_tools\\hhh\\SEO\\contentgap.txt','r','utf-8')
+#lines=fr.readlines()
+lst=[]
+#for l in lines:
+#    lst.append(l.replace('\n',''))
+cursor=db.query('select distinct kw from  searchome_contentgap_serp where ranking is null')
+for c in cursor:
+    lst.append(c['kw'])
+
+
+
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs,number_results=10,language_code='en',pat='searchome.net'):
+    global driver
+    escaped_search_term=urllib.parse.quote(qs)
+#    escaped_search_term = qs.replace(' ', '+')
+#    googleurl='https://www.google.com/search?q='+
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code)
+
+    driver.get(googleurl)
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    idx=0
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            print(str(idx)+': '+href)
+            if pat in href:
+                return idx
+            idx+=1
+        except:
+            print('href exception')
+
+    try:
+        elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+        webdriver.ActionChains(driver).move_to_element(elmt).perform()
+        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    except:
+        print('pnnext exception')
+        return None
+
+    time.sleep(4)
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+            print(str(idx)+': '+href)
+            if pat in href:
+                return idx
+            idx+=1
+
+        except:
+            print('href2 exception')
+
+
+
+result=[]
+driver=None
+
+def restart_browser():
+#    client = docker.from_env()
+#    ls=client.containers.list()
+#    print(ls)
+#    ls[0].restart()
+    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=socks5://191.96.42.80:1080")
+
+    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+#    driver = webdriver.Remote(
+#        command_executor='http://127.0.0.1:4444/wd/hub',
+#        command_executor='http://172.104.93.163:4444/wd/hub', 
+    #command_executor='http://dev2.choozmo.com:14444/wd/hub',
+#    desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+for l in lst:
+#for l in lst[2:]:
+    if True:
+#    if kwlst.get(l) is None:
+        driver=restart_browser()
+
+    #    l='房間 油漆'
+    #    idx=process_query(,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
+#        idx=process_query(l,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
+        idx=process_query(l,number_results=100,language_code='zh-TW',pat='searchome.net')
+
+#        table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()})
+        table.upsert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()},['kw'])
+
+        print({'kw':l,'ranking':idx})
+        db.commit()
+        driver.quit()
+    #    time.sleep(9999)
+
+#        time.sleep(4)

+ 22 - 5
choozmo/term_get_email.py

@@ -13,15 +13,21 @@ import random
 from bs4 import BeautifulSoup
 import requests
 import time
-import rpyc
+# import rpyc
 import sys
 import docker
+<<<<<<< HEAD
 # import  googlesearch
+=======
+# import googlesearch
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
 import codecs
 import sys
 import time
 import dataset
 import os
+import pymysql
+pymysql.install_as_MySQLdb()
 
 def process_one(driver):
     lst=[]
@@ -49,7 +55,6 @@ def process_query(driver,url):
         txt=txt.replace('mailto:','')
         if 'mailto:?subject=' in txt:
             return None
-
         return txt
     except:
         print('not found')
@@ -67,18 +72,28 @@ def process_query(driver,url):
 
 result=[]
 driver=None
+path = '/Users/zooeytsai/Downloads/chromedriver'
 
 def restart_browser():
 #    os.system('docker container restart p4444')
 #    time.sleep(10)
 
     options = webdriver.ChromeOptions()
+    options.add_argument("--headless")
     options.add_argument("start-maximized")
+<<<<<<< HEAD
     options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
     # options.add_argument('--profile-directory=Profile 77')
     # options.add_argument('--profile-directory=Default')
 
     driver=webdriver.Chrome(options=options,executable_path='C:\\Users\\user\\Downloads\\chromedriver_99\\chromedriver')
+=======
+    options.add_argument('user-data-dir=/Users/zooeytsai/Library/Application Support/Google/Chrome/Default')
+#    options.add_argument('--profile-directory=Profile 77')
+    options.add_argument('--profile-directory=Default')
+
+    driver=webdriver.Chrome(options=options,executable_path=path)
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
     #driver = webdriver.Remote(
     #    command_executor='http://127.0.0.1:4444/wd/hub',
     #desired_capabilities=options.to_capabilities())
@@ -87,15 +102,17 @@ def restart_browser():
     return driver
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
-cursor=db.query('select title,url,tag from term_gsearch where url not in (select url from term_progress) order by rand()')
+# cursor=db.query('select title,url,tag from term_gsearch where url not in (select url from term_progress) and tag like "區塊鏈" order by rand()')
+cursor=db.query('select title,url,tag from term_gsearch where tag like "區塊鏈" order by rand()')
 lst=[]
 for c in cursor:
     lst.append(c)
-    
+
 table=db['term_progress']
 driver=restart_browser()
 for c in lst:
     email=process_query(driver,c['url'])
+    print(email)
     c['title']=c['title'].replace('聯絡我們 - ','')
     c['title']=c['title'].replace('聯絡我們-','')
     c['title']=c['title'].replace('聯絡我們|','')
@@ -106,4 +123,4 @@ for c in lst:
     c['title']=c['title'].replace('聯絡我們','')
 
     table.insert({'title':c['title'],'url':c['url'],'email':email,'tag':c['tag']})
-#    time.sleep(3)
+    time.sleep(2)

+ 10 - 0
choozmo/term_gsearch.py

@@ -124,7 +124,12 @@ driver=gsearch_general.restart_browser()
 #idx=gsearch_general.process_query(driver,'聯絡我們 mail 命理',number_results=100,language_code='zh-TW')
 #idx=gsearch_general.process_query(driver,'聯絡我們 mail 改運',number_results=100,language_code='zh-TW')
 # idx=gsearch_general.process_query(driver,'聯絡我們 mail 風水',number_results=100,language_code='zh-TW')
+<<<<<<< HEAD
 idx=gsearch_general.process_query(driver,'聯絡我們 mail 公益',number_results=100,language_code='zh-TW')
+=======
+# idx=gsearch_general.process_query(driver,'聯絡我們 mail 公益',number_results=100,language_code='zh-TW')
+idx=gsearch_general.process_query(driver,'聯絡我們 區塊鏈',number_results=100,language_code='zh-TW')
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
 
 
 
@@ -133,6 +138,11 @@ idx=gsearch_general.process_query(driver,'聯絡我們 mail 公益',number_resul
 print(idx)
 for x in idx:
     x['dt']=datetime.datetime.now()
+<<<<<<< HEAD
+=======
+    x['tag']='區塊鏈2'
+    print(x)
+>>>>>>> 604b4f0737fe7f055de28e84a0a4697a346701cf
     table.insert(x)
 
 if idx==None:

+ 11 - 0
choozmo/test.py

@@ -0,0 +1,11 @@
+import dataset
+import codecs
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+
+#cursor=db.query('select keyword from save_result')
+cursor=db.query('select * from selected_kw')
+
+for c in cursor:
+    print(c)
+

+ 121 - 0
choozmo/watch_yt.py

@@ -0,0 +1,121 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import redis
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+from userAgentRandomizer import userAgents
+
+
+
+
+def init_browser(proxy1='proxy1',tiny1='tiny1',socks5="9050",debug='9922',cdriver=''):
+    os.system('docker container restart '+proxy1)
+    os.system('docker container restart '+tiny1)
+    ua = userAgents()
+    user_agent = ua.random()        
+
+    time.sleep(12)
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument('--proxy-server=socks5://127.0.0.1:'+socks5)
+    options.add_argument('--disable-dev-shm-usage')
+    options.add_argument('--no-sandbox')
+    options.add_argument("--user-agent=" +user_agent)
+
+    options.add_experimental_option("debuggerAddress", "127.0.0.1:"+debug)
+
+#    options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
+#    options.add_experimental_option("debuggerAddress", q[2])
+
+#    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+#    driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    driver = webdriver.Chrome(executable_path=cdriver,options=options)
+
+#    driver = webdriver.Chrome(executable_path=epath,options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+    return driver
+    print(q)
+    process_query(q)
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+yt_lst=[]
+#ace YAt1PdQTp4Q
+#choozmo 'YIftavPmHxo','K5DEJXajtqA'
+#cursor=['LmWkYVV7wtU','pIPs1n7PTMk','K5DEJXajtqA','YIftavPmHxo','UeZjaenjkx4','YAt1PdQTp4Q','pWx4Sb31JZU','80MN3fcbNKk','OLAzzz-V0p4']
+
+
+r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
+
+data=r.get('watch_yt')
+jstext=data.decode('utf-8')
+jsobj=json.loads(jstext)
+
+#js=random.choice(jsobj)
+
+cursor=jsobj
+#cursor=['IsUte9FeL-0','7SsOUn3ufgw','YAt1PdQTp4Q','lnROUcxy6W0','Hf3uts9fO54','YIftavPmHxo','K5DEJXajtqA']
+#cursor=db.query('select * from sbir_yt')
+#for c in cursor:
+#    yt_lst.append(c['url'])
+yt_lst=cursor
+class JParams(object):
+
+  def get(self, yt,proxy1,tiny1,socks5,debug,cdriver):
+    if 'SBIR' in yt:    
+        yt=random.choice(yt_lst)
+        url='https://www.youtube.com/watch?v='+yt
+        print(url)
+    driver=init_browser(proxy1='proxy1',tiny1='tiny1',socks5="9050",debug='9922',cdriver='/root/webdriver/98/chromedriver')
+    #driver.get('https://www.youtube.com/watch?v=K5DEJXajtqA')
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+    driver.execute_script('window.open("'+url+'","_blank");')
+    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+    time.sleep(3)
+
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+
+    time.sleep(35)
+    driver.quit()
+    sys.exit()
+
+
+
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+
+

+ 7 - 0
choozmo/yt1.sh

@@ -0,0 +1,7 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=SBIR --proxy1=proxy1 --tiny1=tiny1 --socks5=9050 --debug=9922
+	sleep 320
+done
+

+ 7 - 0
choozmo/yt2.sh

@@ -0,0 +1,7 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=YAt1PdQTp4Q --proxy1=proxy2 --tiny1=tiny2 --socks5=9052 --debug=9925
+	sleep 1
+done
+

+ 8 - 0
choozmo/yt3.sh

@@ -0,0 +1,8 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=YAt1PdQTp4Q --proxy1=proxy2 --tiny1=tiny3 --socks5=9053 --debug=9926
+
+	sleep 1
+done
+

+ 7 - 0
choozmo/yt4.sh

@@ -0,0 +1,7 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=YAt1PdQTp4Q --proxy1=proxy4 --tiny1=tiny4 --socks5=9054 --debug=9927
+	sleep 1
+done
+

+ 147 - 0
click_and_notify/click_and_not.py

@@ -0,0 +1,147 @@
+#import redis
+import time
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+import codecs
+import random
+import requests
+import time
+import sys
+import docker
+import codecs
+import random
+import os
+import time
+from datetime import datetime
+
+
+driver=None
+click_times = 0
+headers = {
+        "Authorization": "Bearer " + "WekCRfnAirSiSxALiD6gcm0B56EejsoK89zFbIaiZQD",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+# headers = {
+#         "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+#         "Content-Type": "application/x-www-form-urlencoded"
+# }
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    print('processing...')
+    print(qs)
+    q=qs[0]
+    domain=qs[2]
+    cnt=qs[1]
+    global driver
+    global click_times
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+    if cnt > 0:
+        for i in range(cnt):
+            elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(2)
+
+    elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+    idx=1
+    ranking=-1
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+    click_times = click_times +1
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/test?charset=utf8mb4')
+    table = db['click_times']
+    table.update({'id':0,'time':datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'click_time':click_times}, ['id'])
+    print("click times = ",click_times)
+    hour = datetime.now().strftime('%H')
+    if hour== 23:
+        click_times = 0
+        table.update({'id':0,'time':datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'click_time':0}, ['id'])
+
+
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    client = docker.from_env()
+    ls=client.containers.list()
+    print(ls)
+    for l in ls:
+        print(l.name)
+        if 'p4444' in l.name:
+            l.restart()
+            print('restarted')
+            time.sleep(6)
+        else:
+            print('name not correct')
+
+#    options = webdriver.EdgeOptions()
+    try:
+        print('trying 4444....')
+        options = webdriver.ChromeOptions()
+        driver = webdriver.Remote(
+            command_executor='http://127.0.0.1:4444/wd/hub',desired_capabilities=options.to_capabilities())
+        print('4444 done')
+        driver.set_window_size(1400,1000)
+        print('driver is fine')
+        return
+    except:
+        print('driver except')
+        driver=None
+        return None
+
+
+def run_once(url):
+    global driver
+    i=random.randint(0,20)
+    if i<=3 or driver is None:
+        re_get_webdriver()
+    if driver is None:
+        print('driver is none')
+        return
+    try:
+        process_query(url)
+    except:
+        print('process_query exception')
+
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/test?charset=utf8mb4')
+
+cursor=db.query('select kw,page,domain from click_and_notify where category="清原"')
+lst=[]
+for c in cursor:
+    lst.append((c['kw'],c['page'],c['domain']))
+
+while True:
+    l=random.choice(lst)
+    run_once( l )
+    time.sleep(0.001)

+ 8 - 0
console/google_status.py

@@ -0,0 +1,8 @@
+import dataset
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+cursor=db.query("SELECT count(*) as cnt FROM google_poi.shop_list3;")
+
+for c in cursor:
+    print(c['cnt'])
+    break

+ 24 - 0
console/hhh_start_process.py

@@ -0,0 +1,24 @@
+import rpyc
+import os
+# RDP HOME
+conn = rpyc.classic.connect("192.168.192.58",18812)
+conn.execute('import os')
+conn.execute('import subprocess')
+conn.execute('import psutil')
+
+rsubprocess=conn.modules.subprocess
+
+rpsutil=conn.modules.psutil
+#processlist=list()
+for process in rpsutil.process_iter():
+    try:
+        print(process.name())
+        print(process.exe())
+        print(process.username())
+    except:
+        print('except')
+#    print(process)
+
+#rsubprocess.Popen(['python', 'C:/gitlab/kw_tools/hhh/SEO/local_1777.py'],cwd=r'C:\gitlab\kw_tools\hhh\SEO')
+
+

+ 10 - 0
console/restart_seo_tiny_docker.py

@@ -0,0 +1,10 @@
+import rpyc
+import os
+conn = rpyc.classic.connect("192.168.192.45",18812)
+conn.execute('import os')
+ros = conn.modules.os
+ros.system('docker restart tiny1')
+#ros.system('docker restart 62eab82f7759') 
+#print('after restart')
+#time.sleep(11)
+#print('after wait')

+ 15 - 0
docker/alpine-basic/Dockerfile

@@ -0,0 +1,15 @@
+FROM python:alpine3.14
+# update apk repo
+RUN echo "http://dl-4.alpinelinux.org/alpine/v3.14/main" >> /etc/apk/repositories && \
+    echo "http://dl-4.alpinelinux.org/alpine/v3.14/community" >> /etc/apk/repositories
+RUN apk update
+RUN apk add build-base
+RUN apk add libffi-dev
+RUN apk add mariadb-dev
+RUN apk add chromium chromium-chromedriver
+RUN apk add bash
+RUN apk add tmux
+RUN pip3 install --upgrade pip
+RUN pip3 install dataset
+RUN pip3 install mysqlclient
+RUN pip3 install selenium

+ 3 - 0
docker/alpine-basic/script.txt

@@ -0,0 +1,3 @@
+docker tag alpine-basic choozmodocker/alpine-basic:latest
+docker push choozmodocker/alpine-basic:latest
+docker run -it --name poi -v c:/gitlab/GooglePoiCrawler:/app choozmodocker/alpine-chrome:latest /bin/bash

+ 23 - 0
docker/alpine-chrome/Dockerfile

@@ -0,0 +1,23 @@
+FROM python:alpine3.14
+
+# update apk repo
+RUN echo "http://dl-4.alpinelinux.org/alpine/v3.14/main" >> /etc/apk/repositories && \
+    echo "http://dl-4.alpinelinux.org/alpine/v3.14/community" >> /etc/apk/repositories
+
+# install chromedriver
+RUN apk update
+RUN apk add build-base
+RUN apk add libffi-dev
+RUN apk add mariadb-dev
+RUN apk add chromium chromium-chromedriver
+RUN apk add tmux
+# upgrade pip
+RUN pip3 install --upgrade pip
+RUN pip3 install bs4
+RUN pip3 install selenium-wire 
+RUN pip3 install dataset
+RUN pip3 install mysqlclient
+RUN pip3 install selenium
+RUN pip3 install pandas
+RUN pip3 install pymysql
+RUN apk add bash

+ 3 - 0
docker/alpine-chrome/script.txt

@@ -0,0 +1,3 @@
+docker tag alpine-chrome choozmodocker/alpine-chrome:latest
+docker push choozmodocker/alpine-chrome:latest
+docker run -it --name poi -v c:/gitlab/GooglePoiCrawler:/app choozmodocker/alpine-chrome:latest /bin/bash

+ 4 - 0
docker/alpine-hhh/Dockerfile

@@ -0,0 +1,4 @@
+FROM choozmodocker/alpine-basic:latest
+RUN pip3 install userAgentRandomizer
+ENTRYPOINT ["python3", "/app/local_1777.py"]
+ 

+ 8 - 0
docker/alpine-poi/Dockerfile

@@ -0,0 +1,8 @@
+FROM choozmodocker/alpine-basic:latest
+RUN pip3 install bs4
+RUN pip3 install selenium-wire 
+RUN pip3 install pandas
+RUN pip3 install redis
+RUN pip3 install pymysql
+RUN pip3 install requests
+ENTRYPOINT ["python3", "/app/run4.py"]

+ 6 - 0
docker/alpine-poi/script.txt

@@ -0,0 +1,6 @@
+docker tag alpine-poi choozmodocker/alpine-poi:latest
+docker push choozmodocker/alpine-poi:latest
+docker run -it --name poi1 -v c:/gitlab/GooglePoiCrawler:/app choozmodocker/alpine-poi:latest
+
+
+docker run -it --name poi -v c:/gitlab/GooglePoiCrawler:/app alpine-poi

+ 3 - 0
docker/alpine-seo/Dockerfile

@@ -0,0 +1,3 @@
+FROM choozmodocker/alpine-basic:latest
+ENTRYPOINT ["python3", "/app/click_commerce.py"]
+

+ 3 - 0
docker/alpine-test/Dockerfile

@@ -2,8 +2,11 @@ FROM joyzoursky/python-chromedriver:3.9-alpine-selenium
 RUN apk add bash
 RUN apk add --virtual build-deps gcc python3-dev musl-dev
 RUN apk add --no-cache mariadb-dev
+RUN apk add libffi-dev
 RUN apk add tmux
 RUN pip3 install --upgrade pip
+RUN pip3 install bs4
+RUN pip3 install selenium-wire
 RUN pip3 install dataset
 RUN pip3 install mysqlclient
 

+ 27 - 0
docker/chrome99-test/Dockerfile

@@ -0,0 +1,27 @@
+FROM selenium/standalone-chrome:99.0
+RUN sudo apt-get update
+RUN sudo apt-get -y install gcc python3-dev libmariadb-dev
+RUN sudo apt-get -y install python3
+RUN sudo apt-get -y install python3-pip
+RUN sudo apt-get -y install tmux
+RUN sudo pip3 install --upgrade pip
+RUN sudo pip3 install bs4
+RUN sudo pip3 install selenium-wire 
+RUN sudo pip3 install dataset
+RUN sudo pip3 install mysqlclient
+RUN sudo pip3 install pandas
+RUN sudo pip3 install pymysql
+
+#RUN apk add bash
+#RUN apk add --no-cache mariadb-dev
+#RUN apk add libffi-dev
+#RUN apk add tmux
+#RUN pip3 install --upgrade pip
+#RUN pip3 install bs4
+#RUN pip3 install selenium-wire
+#RUN pip3 install dataset
+#RUN pip3 install mysqlclient
+
+#RUN pip3 install selenium-wire
+CMD ["python3"]
+

+ 63 - 0
docker/gat/package.json

@@ -0,0 +1,63 @@
+{
+  "name": "cheatsheets",
+  "description": "Devhints.io",
+  "version": "1.0.0",
+  "author": "Rico Sta. Cruz <rstacruz@users.noreply.github.com>",
+  "dependencies": {
+    "autoprefixer": "^9.8.2",
+    "dom101": "^2.2.1",
+    "hint.css": "^2.6.0",
+    "isotope-layout": "^3.0.6",
+    "lodash.noop": "^3.0.1",
+    "modularscale-sass": "^3.0.10",
+    "onmount": "^1.3.0",
+    "postcss-modules": "^2.0.0",
+    "prismjs": "^1.20.0",
+    "sanitize.css": "^11.0.1",
+    "sass": "^1.26.8"
+  },
+  "devDependencies": {
+    "@babel/core": "^7.10.2",
+    "@babel/preset-env": "^7.10.2",
+    "@rstacruz/prettier-plugin-markdown-code-fences": "^1.0.0",
+    "jest": "26.0.1",
+    "jest-html": "1.5.0",
+    "netlify-plugin-minify-html": "^0.2.3",
+    "npm-run-all": "^4.1.5",
+    "parcel-bundler": "^1.12.4",
+    "prettier": "^2.0.5",
+    "wait-on": "^5.0.1"
+  },
+  "homepage": "https://devhints.io/",
+  "jest": {
+    "snapshotSerializers": [
+      "<rootDir>/node_modules/jest-html"
+    ]
+  },
+  "license": "MIT",
+  "main": "index.js",
+  "private": true,
+  "repository": "https://github.com/rstacruz/cheatsheets.git",
+  "scripts": {
+    "build": "run-s -s 'parcel:*:build' jekyll:build",
+    "dev": "run-p -sl jekyll:watch 'parcel:*:watch'",
+    "jekyll:build": "bundle exec jekyll build",
+    "jekyll:watch": "wait-on assets/packed/app.js && wait-on _includes/2017/critical/critical-sheet.css && bundle exec jekyll serve --safe --trace --drafts --watch --incremental --host ${HOST:-0.0.0.0} --port ${PORT:-3000}",
+    "jest-html": "jest-html",
+    "parcel:app:build": "parcel build '_parcel/app.js' -d assets/packed --no-source-maps --no-autoinstall",
+    "parcel:app:watch": "parcel watch '_parcel/app.js' -d assets/packed --no-source-maps --no-autoinstall",
+    "parcel:build": "run-s -s 'parcel:*:build'",
+    "parcel:critical:build": "parcel build '_parcel/critical*.js' -d _includes/2017/critical --no-source-maps --no-autoinstall",
+    "parcel:critical:watch": "parcel watch '_parcel/critical*.js' -d _includes/2017/critical --no-source-maps --no-autoinstall",
+    "predev": "rm -rf assets/packed _includes/2017/critical",
+    "prejekyll:build": "bundle",
+    "prejekyll:watch": "bundle",
+    "prettier:format": "prettier --write '_parcel/**/*.{js,scss}'",
+    "test": "jest",
+    "test:smoke": "bash _support/smoke_test.sh"
+  },
+  "volta": {
+    "node": "16.14.2",
+    "yarn": "1.22.18"
+  }
+}

+ 19 - 0
docker/headless-clickbot/Dockerfile

@@ -0,0 +1,19 @@
+FROM python:3.9.13-alpine3.16
+RUN echo "http://dl-4.alpinelinux.org/alpine/v3.16/main" >> /etc/apk/repositories && \
+    echo "http://dl-4.alpinelinux.org/alpine/v3.16/community" >> /etc/apk/repositories
+RUN apk update
+RUN apk add build-base
+RUN apk add libffi-dev
+RUN apk add mariadb-dev
+RUN apk add chromium chromium-chromedriver
+RUN apk add bash
+RUN apk add tmux
+RUN pip3 install --upgrade pip
+RUN pip3 install dataset
+RUN pip3 install mysqlclient
+RUN pip3 install selenium
+
+RUN pip3 install selenium
+RUN pip3 install redis
+RUN pip3 install requests
+ENTRYPOINT ["python3", "/app/general_clickbot_hhh.py"]

+ 4 - 0
docker/hhh-backstage-docker/Dockerfile

@@ -0,0 +1,4 @@
+FROM shengeih/nginx-php7:v2
+RUN sed -i '107s/^.*$/pm.max_children = 25/' /opt/docker/etc/php/fpm/pool.d/application.conf
+
+

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff