From 8bc0d2dac76bf9a266ff77615077bf9ea3f43670 Mon Sep 17 00:00:00 2001 From: Tony Marsella Date: Fri, 30 Jun 2023 15:57:40 -0700 Subject: [PATCH] error handling added to downloader --- xmldownloader.py | 114 +++++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/xmldownloader.py b/xmldownloader.py index bd6378a..4d110f7 100644 --- a/xmldownloader.py +++ b/xmldownloader.py @@ -5,65 +5,81 @@ from fake_useragent import UserAgent import requests import datetime -# Get the current date and time -current_datetime = datetime.datetime.now() +try: + # Get the current date and time + current_datetime = datetime.datetime.now() -# Format the date and time as a string -run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S") + # Format the date and time as a string + run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S") -# Define the list of URLs -urls = [ - "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14", - "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14", -] + # Define the list of URLs + urls = [ + "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14", + "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14", + ] -# Set a custom user agent -ua = UserAgent() -headers = { - 'User-Agent': ua.random, - 'Referer': 'https://google.com', - 'Accept-Language': 'en-US,en;q=0.9', - 'Accept-Encoding': 'gzip, deflate, br', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', -} + # Set a custom user agent + ua = UserAgent() + headers = { + 'User-Agent': ua.random, + 'Referer': 'https://google.com', + 'Accept-Language': 'en-US,en;q=0.9', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + } -# Create a session object to handle cookies -session = requests.Session() -session.headers.update(headers) + # Create a session object to handle cookies + session = requests.Session() + session.headers.update(headers) -# Create a new XML root element to store all the extracted values -output_root = ET.Element("root") + # Create a new XML root element to store all the extracted values + output_root = ET.Element("root") -for url in urls: - # Download the XML file - response = session.get(url) - content = response.content + for url in urls: + try: + # Download the XML file + response = session.get(url) + response.raise_for_status() # Raise an exception for non-successful status codes - # Read the downloaded XML content - tree = ET.fromstring(content) + content = response.content - # Define the desired attribute names - attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"] + # Read the downloaded XML content + tree = ET.fromstring(content) - # Iterate over the "item" elements and extract the desired values - for item in tree.findall(".//item"): - new_element = ET.SubElement(output_root, "item") + # Define the desired attribute names + attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"] - # Extract the desired attributes from the "item" element - for attribute_name in attribute_names: - value = item.find(attribute_name).text - if value is not None: - new_element.set(attribute_name, value) + # Iterate over the "item" elements and extract the desired values + for item in tree.findall(".//item"): + new_element = ET.SubElement(output_root, "item") -# Add a new element for the run time -run_time_element = ET.SubElement(output_root, "run_time") -run_time_element.text = run_time + # Extract the desired attributes from the "item" element + for attribute_name in attribute_names: + value = item.find(attribute_name).text + if value is not None: + new_element.set(attribute_name, value) + except requests.exceptions.RequestException as e: + # Handle request-related errors + print("An error occurred while downloading the XML:", str(e)) + except (ET.ParseError, AttributeError) as e: + # Handle XML parsing errors + print("An error occurred while parsing the XML:", str(e)) -# Create an ElementTree object with the output root -output_tree = ET.ElementTree(output_root) + # Add a new element for the run time + run_time_element = ET.SubElement(output_root, "run_time") + run_time_element.text = run_time -# Write the output to the XML file, overwriting the existing data -output_filename = "./indeed_output.xml" -with open(output_filename, 'wb') as f: - output_tree.write(f, encoding="utf-8", xml_declaration=True) + # Create an ElementTree object with the output root + output_tree = ET.ElementTree(output_root) + + # Write the output to the XML file, overwriting the existing data + output_filename = "./indeed_output.xml" + with open(output_filename, 'wb') as f: + output_tree.write(f, encoding="utf-8", xml_declaration=True) + + print("Script execution completed successfully.") + +except Exception as e: + # Handle any other uncaught exceptions + print("An unexpected error occurred:", str(e))