Description
This Python script automates retrieving work schedules from ShiftAgent, converting them into iCalendar format, and emailing the result. It uses web scraping to access the schedule, parses the data, generates an iCal file, and sends it via email.
Technical Breakdown
1. Web Scraping with Playwright
- Utilizes the Playwright library for browser automation
- Navigates to the ShiftAgent login page and authenticates
- Extracts the HTML content of the schedule page
2. HTML Parsing with BeautifulSoup
- Employs BeautifulSoup to parse the extracted HTML
- Identifies and extracts relevant schedule information (dates, times, positions)
3. Date and Time Handling
- Uses the
datetimemodule to parse and manipulate date and time information - Handles edge cases like shifts spanning midnight
4. iCalendar File Generation
- Leverages the
icalendarlibrary to create a standard iCalendar file - Converts parsed schedule data into iCalendar events
5. Email Automation
- Implements SMTP protocol using Python’s
smtplibmodule - Creates a multipart email message with
email.mimemodules - Attaches the generated iCalendar file to the email
6. Error Handling and Logging
- Incorporates try-except blocks for robust error handling
- Provides informative console output for debugging and monitoring
7. Main Execution Flow
- Orchestrates the entire process: scraping, parsing, iCalendar generation, and email sending
- Modular design allows for easy maintenance and future enhancements
This project showcases the power of Python in automating real-world tasks, combining various libraries to create a practical solution for managing work schedules efficiently.
import re
import os
from datetime import datetime
from bs4 import BeautifulSoup
from icalendar import Calendar, Event
import time
from playwright.sync_api import sync_playwright
import smtplib
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
def send_icalendar_email(ics_filename):
try:
# Email credentials and setup
sender_email = "boss@gmail.com"
sender_password = "password"
recipient_email = "employee@gmail.com"
# Create the email
msg = MIMEMultipart()
msg['From'] = sender_email
msg['To'] = recipient_email
msg['Subject'] = "Work Schedule iCalendar File"
# Email body
body = "Please find attached your work schedule in iCalendar format."
msg.attach(MIMEText(body, 'plain'))
# Attach the iCalendar file
with open(ics_filename, "rb") as file:
part = MIMEApplication(file.read(), Name=os.path.basename(ics_filename))
part['Content-Disposition'] = f'attachment; filename="{os.path.basename(ics_filename)}"'
msg.attach(part)
# Send email
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login(sender_email, sender_password)
server.send_message(msg)
server.quit()
print(f"iCalendar file sent to {recipient_email}")
except Exception as e:
print(f"Failed to send email: {str(e)}")
def scrape_shiftagent():
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
try:
print("Navigating to the login page...")
page.goto("https://shiftagent.org/sa/#/login", wait_until="networkidle", timeout=60000)
print("Waiting for login form to be rendered...")
page.wait_for_selector('#fullscreen-overlay .login', state='visible', timeout=30000)
print("Filling in login details...")
page.fill('#fullscreen-overlay .login input[type="email"]', 'example@gmail.com')
page.fill('#fullscreen-overlay .login input[type="password"]', 'password')
print("Waiting for a moment...")
time.sleep(1)
print("Attempting to click submit button...")
page.press('#fullscreen-overlay .login input[type="password"]', 'Enter')
print("Waiting for navigation after login...")
time.sleep(3)
page.wait_for_url("https://shiftagent.org/sa/#/workPeriods/me", timeout=20000)
if "/sa/#/login" in page.url:
print("Login failed. Still on login page.")
page.screenshot(path="login_failed.png")
return
print("Successfully logged in. Navigating to pickups page...")
page.goto("https://shiftagent.org/sa/#/workPeriods/me", wait_until="networkidle", timeout=30000)
print("Waiting for schedule elements to load...")
page.wait_for_selector('.upcoming_work_periods_container .ng-scope', timeout=60000)
print("Extracting schedule data from the page...")
html_content = page.content()
main(html_content)
except Exception as e:
print(f"An error occurred: {str(e)}")
finally:
context.close()
browser.close()
def parse_html_schedule(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
schedule = []
schedule_container = soup.select('.upcoming_work_periods_container .ng-scope')
print(f"Found {len(schedule_container)} schedule items.")
for item in schedule_container:
date_element = item.select_one('.sched_profile_img_date')
time_element = item.select_one('.sched_times')
position_element = item.select_one('.sched_pos')
if date_element and time_element and position_element:
date = date_element.text.strip()
time_range = time_element.text.strip()
position = position_element.text.strip()
time_range = re.sub(r'(\d{1,2}:\d{2})\s?([ap])', r'\1\2m', time_range)
month_day = date.split()
if len(month_day) == 2:
month, day = month_day
else:
print(f"Unexpected date format: {date}")
continue
times = time_range.split('to')
if len(times) == 2:
start_time, end_time = times
else:
print(f"Unexpected time format: {time_range}")
continue
year = datetime.now().year
try:
start_datetime = datetime.strptime(f"{year} {month} {day} {start_time.strip()}", "%Y %b %d %I:%M%p")
end_datetime = datetime.strptime(f"{year} {month} {day} {end_time.strip()}", "%Y %b %d %I:%M%p")
except ValueError as ve:
print(f"Error parsing datetime: {ve}")
continue
if end_datetime < start_datetime:
end_datetime = end_datetime.replace(day=end_datetime.day + 1)
schedule.append({
'position': position,
'start_time': start_datetime,
'end_time': end_datetime
})
else:
print("Warning: Could not find one or more required elements for this item.")
return schedule
def create_work_schedule_ical(schedule):
cal = Calendar()
cal.add('prodid', '-//Your Company//Work Schedule//EN')
cal.add('version', '2.0')
for shift in schedule:
event = Event()
event.add('summary', f"Work - {shift['position']}")
event.add('dtstart', shift['start_time'])
event.add('dtend', shift['end_time'])
event.add('description', f"Working as {shift['position']}")
cal.add_component(event)
return cal
def main(html_content):
schedule = parse_html_schedule(html_content)
if not schedule:
print("No schedule data found.")
return
cal = create_work_schedule_ical(schedule)
ics_filename = 'work_schedule.ics'
with open(ics_filename, 'wb') as f:
f.write(cal.to_ical())
print(f"iCalendar file '{ics_filename}' has been generated.")
send_icalendar_email(ics_filename)
if __name__ == "__main__":
scrape_shiftagent()
0 Comments