Description

This Python script automates retrieving work schedules from ShiftAgent, converting them into iCalendar format, and emailing the result. It uses web scraping to access the schedule, parses the data, generates an iCal file, and sends it via email.

Technical Breakdown

1. Web Scraping with Playwright

  • Utilizes the Playwright library for browser automation
  • Navigates to the ShiftAgent login page and authenticates
  • Extracts the HTML content of the schedule page

2. HTML Parsing with BeautifulSoup

  • Employs BeautifulSoup to parse the extracted HTML
  • Identifies and extracts relevant schedule information (dates, times, positions)

3. Date and Time Handling

  • Uses the datetime module to parse and manipulate date and time information
  • Handles edge cases like shifts spanning midnight

4. iCalendar File Generation

  • Leverages the icalendar library to create a standard iCalendar file
  • Converts parsed schedule data into iCalendar events

5. Email Automation

  • Implements SMTP protocol using Python’s smtplib module
  • Creates a multipart email message with email.mime modules
  • Attaches the generated iCalendar file to the email

6. Error Handling and Logging

  • Incorporates try-except blocks for robust error handling
  • Provides informative console output for debugging and monitoring

7. Main Execution Flow

  • Orchestrates the entire process: scraping, parsing, iCalendar generation, and email sending
  • Modular design allows for easy maintenance and future enhancements

This project showcases the power of Python in automating real-world tasks, combining various libraries to create a practical solution for managing work schedules efficiently.

import re
import os
from datetime import datetime
from bs4 import BeautifulSoup
from icalendar import Calendar, Event
import time
from playwright.sync_api import sync_playwright
import smtplib
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart

def send_icalendar_email(ics_filename):
    try:
        # Email credentials and setup
        sender_email = "boss@gmail.com"
        sender_password = "password"
        recipient_email = "employee@gmail.com"

        # Create the email
        msg = MIMEMultipart()
        msg['From'] = sender_email
        msg['To'] = recipient_email
        msg['Subject'] = "Work Schedule iCalendar File"

        # Email body
        body = "Please find attached your work schedule in iCalendar format."
        msg.attach(MIMEText(body, 'plain'))

        # Attach the iCalendar file
        with open(ics_filename, "rb") as file:
            part = MIMEApplication(file.read(), Name=os.path.basename(ics_filename))
        part['Content-Disposition'] = f'attachment; filename="{os.path.basename(ics_filename)}"'
        msg.attach(part)

        # Send email
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()
        server.login(sender_email, sender_password)
        server.send_message(msg)
        server.quit()

        print(f"iCalendar file sent to {recipient_email}")

    except Exception as e:
        print(f"Failed to send email: {str(e)}")

def scrape_shiftagent():
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context()
        page = context.new_page()

        try:
            print("Navigating to the login page...")
            page.goto("https://shiftagent.org/sa/#/login", wait_until="networkidle", timeout=60000)

            print("Waiting for login form to be rendered...")
            page.wait_for_selector('#fullscreen-overlay .login', state='visible', timeout=30000)

            print("Filling in login details...")
            page.fill('#fullscreen-overlay .login input[type="email"]', 'example@gmail.com')
            page.fill('#fullscreen-overlay .login input[type="password"]', 'password')

            print("Waiting for a moment...")
            time.sleep(1)

            print("Attempting to click submit button...")
            page.press('#fullscreen-overlay .login input[type="password"]', 'Enter')

            print("Waiting for navigation after login...")
            time.sleep(3)
            page.wait_for_url("https://shiftagent.org/sa/#/workPeriods/me", timeout=20000)

            if "/sa/#/login" in page.url:
                print("Login failed. Still on login page.")
                page.screenshot(path="login_failed.png")
                return

            print("Successfully logged in. Navigating to pickups page...")
            page.goto("https://shiftagent.org/sa/#/workPeriods/me", wait_until="networkidle", timeout=30000)

            print("Waiting for schedule elements to load...")
            page.wait_for_selector('.upcoming_work_periods_container .ng-scope', timeout=60000)

            print("Extracting schedule data from the page...")
            html_content = page.content()

            main(html_content)

        except Exception as e:
            print(f"An error occurred: {str(e)}")

        finally:
            context.close()
            browser.close()

def parse_html_schedule(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    schedule = []

    schedule_container = soup.select('.upcoming_work_periods_container .ng-scope')
    print(f"Found {len(schedule_container)} schedule items.")

    for item in schedule_container:
        date_element = item.select_one('.sched_profile_img_date')
        time_element = item.select_one('.sched_times')
        position_element = item.select_one('.sched_pos')

        if date_element and time_element and position_element:
            date = date_element.text.strip()
            time_range = time_element.text.strip()
            position = position_element.text.strip()

            time_range = re.sub(r'(\d{1,2}:\d{2})\s?([ap])', r'\1\2m', time_range)

            month_day = date.split()
            if len(month_day) == 2:
                month, day = month_day
            else:
                print(f"Unexpected date format: {date}")
                continue

            times = time_range.split('to')
            if len(times) == 2:
                start_time, end_time = times
            else:
                print(f"Unexpected time format: {time_range}")
                continue

            year = datetime.now().year

            try:
                start_datetime = datetime.strptime(f"{year} {month} {day} {start_time.strip()}", "%Y %b %d %I:%M%p")
                end_datetime = datetime.strptime(f"{year} {month} {day} {end_time.strip()}", "%Y %b %d %I:%M%p")
            except ValueError as ve:
                print(f"Error parsing datetime: {ve}")
                continue

            if end_datetime < start_datetime:
                end_datetime = end_datetime.replace(day=end_datetime.day + 1)

            schedule.append({
                'position': position,
                'start_time': start_datetime,
                'end_time': end_datetime
            })

        else:
            print("Warning: Could not find one or more required elements for this item.")

    return schedule

def create_work_schedule_ical(schedule):
    cal = Calendar()
    cal.add('prodid', '-//Your Company//Work Schedule//EN')
    cal.add('version', '2.0')

    for shift in schedule:
        event = Event()
        event.add('summary', f"Work - {shift['position']}")
        event.add('dtstart', shift['start_time'])
        event.add('dtend', shift['end_time'])
        event.add('description', f"Working as {shift['position']}")
        cal.add_component(event)

    return cal

def main(html_content):
    schedule = parse_html_schedule(html_content)

    if not schedule:
        print("No schedule data found.")
        return

    cal = create_work_schedule_ical(schedule)

    ics_filename = 'work_schedule.ics'
    with open(ics_filename, 'wb') as f:
        f.write(cal.to_ical())

    print(f"iCalendar file '{ics_filename}' has been generated.")

    send_icalendar_email(ics_filename)

if __name__ == "__main__":
    scrape_shiftagent()

0 Comments

Leave a Reply

Avatar placeholder

Your email address will not be published. Required fields are marked *