2024-05-10 23:15:29 +08:00
|
|
|
from celery import shared_task
|
2024-09-24 16:08:28 +08:00
|
|
|
from webdriver.utils import setup_webdriver, selenium_action_template, google_search, get_element, get_elements
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
from search_results.tasks import create_search_result
|
2024-05-10 23:15:29 +08:00
|
|
|
|
|
|
|
|
2024-09-24 16:08:28 +08:00
|
|
|
# Task template
|
|
|
|
@shared_task(autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 5})
|
2024-05-10 23:15:29 +08:00
|
|
|
def sample_selenium_task():
|
2024-09-24 16:08:28 +08:00
|
|
|
|
|
|
|
driver = setup_webdriver(use_proxy=False, use_saved_session=False)
|
|
|
|
driver.get("Place URL here")
|
2024-05-10 23:15:29 +08:00
|
|
|
selenium_action_template(driver)
|
2024-09-24 16:08:28 +08:00
|
|
|
|
|
|
|
# TODO: Modify this as needed
|
2024-05-10 23:15:29 +08:00
|
|
|
|
|
|
|
# Once completed, always close the session
|
|
|
|
driver.close()
|
|
|
|
driver.quit()
|
2024-09-24 16:08:28 +08:00
|
|
|
|
|
|
|
# Sample task to scrape Google for search results based on a keyword
|
|
|
|
|
|
|
|
|
|
|
|
@shared_task(autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 5})
|
|
|
|
def simple_google_search():
|
|
|
|
driver = setup_webdriver(use_proxy=False, use_saved_session=False)
|
|
|
|
driver.get(f"https://google.com/")
|
|
|
|
|
|
|
|
google_search(driver, search_term="cat blog posts")
|
|
|
|
|
|
|
|
# Count number of Google search results
|
|
|
|
search_items = get_elements(
|
|
|
|
driver, "xpath", '//*[@id="search"]/div[1]/div[1]/*')
|
|
|
|
|
|
|
|
for item in search_items:
|
|
|
|
title = item.find_element(By.TAG_NAME, 'h3').text
|
|
|
|
link = item.find_element(By.TAG_NAME, 'a').get_attribute('href')
|
|
|
|
|
|
|
|
create_search_result.apply_async(
|
|
|
|
kwargs={"title": title, "link": link})
|
|
|
|
|
|
|
|
driver.close()
|
|
|
|
driver.quit()
|