From 8bc0d2dac76bf9a266ff77615077bf9ea3f43670 Mon Sep 17 00:00:00 2001
From: Tony Marsella <tony@shilohsoftware.com>
Date: Fri, 30 Jun 2023 15:57:40 -0700
Subject: [PATCH] error handling added to downloader

---
 xmldownloader.py | 114 +++++++++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/xmldownloader.py b/xmldownloader.py
index bd6378a..4d110f7 100644
--- a/xmldownloader.py
+++ b/xmldownloader.py
@@ -5,65 +5,81 @@ from fake_useragent import UserAgent
 import requests
 import datetime
 
-# Get the current date and time
-current_datetime = datetime.datetime.now()
+try:
+    # Get the current date and time
+    current_datetime = datetime.datetime.now()
 
-# Format the date and time as a string
-run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
+    # Format the date and time as a string
+    run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
 
-# Define the list of URLs
-urls = [
-    "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
-    "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
-]
+    # Define the list of URLs
+    urls = [
+        "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
+        "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
+    ]
 
-# Set a custom user agent
-ua = UserAgent()
-headers = {
-    'User-Agent': ua.random,
-    'Referer': 'https://google.com',
-    'Accept-Language': 'en-US,en;q=0.9',
-    'Accept-Encoding': 'gzip, deflate, br',
-    'Connection': 'keep-alive',
-    'Upgrade-Insecure-Requests': '1',
-}
+    # Set a custom user agent
+    ua = UserAgent()
+    headers = {
+        'User-Agent': ua.random,
+        'Referer': 'https://google.com',
+        'Accept-Language': 'en-US,en;q=0.9',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Connection': 'keep-alive',
+        'Upgrade-Insecure-Requests': '1',
+    }
 
-# Create a session object to handle cookies
-session = requests.Session()
-session.headers.update(headers)
+    # Create a session object to handle cookies
+    session = requests.Session()
+    session.headers.update(headers)
 
-# Create a new XML root element to store all the extracted values
-output_root = ET.Element("root")
+    # Create a new XML root element to store all the extracted values
+    output_root = ET.Element("root")
 
-for url in urls:
-    # Download the XML file
-    response = session.get(url)
-    content = response.content
+    for url in urls:
+        try:
+            # Download the XML file
+            response = session.get(url)
+            response.raise_for_status()  # Raise an exception for non-successful status codes
 
-    # Read the downloaded XML content
-    tree = ET.fromstring(content)
+            content = response.content
 
-    # Define the desired attribute names
-    attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"]
+            # Read the downloaded XML content
+            tree = ET.fromstring(content)
 
-    # Iterate over the "item" elements and extract the desired values
-    for item in tree.findall(".//item"):
-        new_element = ET.SubElement(output_root, "item")
+            # Define the desired attribute names
+            attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"]
 
-        # Extract the desired attributes from the "item" element
-        for attribute_name in attribute_names:
-            value = item.find(attribute_name).text
-            if value is not None:
-                new_element.set(attribute_name, value)
+            # Iterate over the "item" elements and extract the desired values
+            for item in tree.findall(".//item"):
+                new_element = ET.SubElement(output_root, "item")
 
-# Add a new element for the run time
-run_time_element = ET.SubElement(output_root, "run_time")
-run_time_element.text = run_time
+                # Extract the desired attributes from the "item" element
+                for attribute_name in attribute_names:
+                    value = item.find(attribute_name).text
+                    if value is not None:
+                        new_element.set(attribute_name, value)
+        except requests.exceptions.RequestException as e:
+            # Handle request-related errors
+            print("An error occurred while downloading the XML:", str(e))
+        except (ET.ParseError, AttributeError) as e:
+            # Handle XML parsing errors
+            print("An error occurred while parsing the XML:", str(e))
 
-# Create an ElementTree object with the output root
-output_tree = ET.ElementTree(output_root)
+    # Add a new element for the run time
+    run_time_element = ET.SubElement(output_root, "run_time")
+    run_time_element.text = run_time
 
-# Write the output to the XML file, overwriting the existing data
-output_filename = "./indeed_output.xml"
-with open(output_filename, 'wb') as f:
-    output_tree.write(f, encoding="utf-8", xml_declaration=True)
+    # Create an ElementTree object with the output root
+    output_tree = ET.ElementTree(output_root)
+
+    # Write the output to the XML file, overwriting the existing data
+    output_filename = "./indeed_output.xml"
+    with open(output_filename, 'wb') as f:
+        output_tree.write(f, encoding="utf-8", xml_declaration=True)
+
+    print("Script execution completed successfully.")
+
+except Exception as e:
+    # Handle any other uncaught exceptions
+    print("An unexpected error occurred:", str(e))