mirror of
https://github.com/lemeow125/DRF_Template.git
synced 2025-06-29 00:25:44 +08:00
Update webdriver utility functions and add a sample celery task for scraping Google search
This commit is contained in:
parent
0e902b1f04
commit
9e2e32fba8
11 changed files with 193 additions and 25 deletions
|
@ -1,16 +1,43 @@
|
|||
from celery import shared_task
|
||||
from webdriver.utils import setup_webdriver, selenium_action_template
|
||||
|
||||
# Sample Celery Selenium function
|
||||
# TODO: Modify this as needed
|
||||
from webdriver.utils import setup_webdriver, selenium_action_template, google_search, get_element, get_elements
|
||||
from selenium.webdriver.common.by import By
|
||||
from search_results.tasks import create_search_result
|
||||
|
||||
|
||||
@shared_task(autoretry_for=(Exception,), retry_kwargs={'max_retries': 6, 'countdown': 5})
|
||||
# Task template
|
||||
@shared_task(autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 5})
|
||||
def sample_selenium_task():
|
||||
driver = setup_webdriver()
|
||||
|
||||
driver = setup_webdriver(use_proxy=False, use_saved_session=False)
|
||||
driver.get("Place URL here")
|
||||
selenium_action_template(driver)
|
||||
# Place any other actions here after Selenium is done executing
|
||||
|
||||
# TODO: Modify this as needed
|
||||
|
||||
# Once completed, always close the session
|
||||
driver.close()
|
||||
driver.quit()
|
||||
|
||||
# Sample task to scrape Google for search results based on a keyword
|
||||
|
||||
|
||||
@shared_task(autoretry_for=(Exception,), retry_kwargs={'max_retries': 3, 'countdown': 5})
|
||||
def simple_google_search():
|
||||
driver = setup_webdriver(use_proxy=False, use_saved_session=False)
|
||||
driver.get(f"https://google.com/")
|
||||
|
||||
google_search(driver, search_term="cat blog posts")
|
||||
|
||||
# Count number of Google search results
|
||||
search_items = get_elements(
|
||||
driver, "xpath", '//*[@id="search"]/div[1]/div[1]/*')
|
||||
|
||||
for item in search_items:
|
||||
title = item.find_element(By.TAG_NAME, 'h3').text
|
||||
link = item.find_element(By.TAG_NAME, 'a').get_attribute('href')
|
||||
|
||||
create_search_result.apply_async(
|
||||
kwargs={"title": title, "link": link})
|
||||
|
||||
driver.close()
|
||||
driver.quit()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue