error handling added to downloader

2023-06-30 15:57:40 -07:00 · 2023-06-30 15:57:40 -07:00 · 8bc0d2dac7
commit 8bc0d2dac7
parent 3c33a88fe0
1 changed files with 65 additions and 49 deletions
--- a/xmldownloader.py
+++ b/xmldownloader.py
@ -5,65 +5,81 @@ from fake_useragent import UserAgent
 import requests
 import datetime

-# Get the current date and time
-current_datetime = datetime.datetime.now()
+try:
+    # Get the current date and time
+    current_datetime = datetime.datetime.now()

-# Format the date and time as a string
-run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
+    # Format the date and time as a string
+    run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S")

-# Define the list of URLs
-urls = [
-    "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
-    "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
-]
+    # Define the list of URLs
+    urls = [
+        "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
+        "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14",
+    ]

-# Set a custom user agent
-ua = UserAgent()
-headers = {
-    'User-Agent': ua.random,
-    'Referer': 'https://google.com',
-    'Accept-Language': 'en-US,en;q=0.9',
-    'Accept-Encoding': 'gzip, deflate, br',
-    'Connection': 'keep-alive',
-    'Upgrade-Insecure-Requests': '1',
-}
+    # Set a custom user agent
+    ua = UserAgent()
+    headers = {
+        'User-Agent': ua.random,
+        'Referer': 'https://google.com',
+        'Accept-Language': 'en-US,en;q=0.9',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Connection': 'keep-alive',
+        'Upgrade-Insecure-Requests': '1',
+    }

-# Create a session object to handle cookies
-session = requests.Session()
-session.headers.update(headers)
+    # Create a session object to handle cookies
+    session = requests.Session()
+    session.headers.update(headers)

-# Create a new XML root element to store all the extracted values
-output_root = ET.Element("root")
+    # Create a new XML root element to store all the extracted values
+    output_root = ET.Element("root")

-for url in urls:
-    # Download the XML file
-    response = session.get(url)
-    content = response.content
+    for url in urls:
+        try:
+            # Download the XML file
+            response = session.get(url)
+            response.raise_for_status()  # Raise an exception for non-successful status codes

-    # Read the downloaded XML content
-    tree = ET.fromstring(content)
+            content = response.content

-    # Define the desired attribute names
-    attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"]
+            # Read the downloaded XML content
+            tree = ET.fromstring(content)

-    # Iterate over the "item" elements and extract the desired values
-    for item in tree.findall(".//item"):
-        new_element = ET.SubElement(output_root, "item")
+            # Define the desired attribute names
+            attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"]

-        # Extract the desired attributes from the "item" element
-        for attribute_name in attribute_names:
-            value = item.find(attribute_name).text
-            if value is not None:
-                new_element.set(attribute_name, value)
+            # Iterate over the "item" elements and extract the desired values
+            for item in tree.findall(".//item"):
+                new_element = ET.SubElement(output_root, "item")

-# Add a new element for the run time
-run_time_element = ET.SubElement(output_root, "run_time")
-run_time_element.text = run_time
+                # Extract the desired attributes from the "item" element
+                for attribute_name in attribute_names:
+                    value = item.find(attribute_name).text
+                    if value is not None:
+                        new_element.set(attribute_name, value)
+        except requests.exceptions.RequestException as e:
+            # Handle request-related errors
+            print("An error occurred while downloading the XML:", str(e))
+        except (ET.ParseError, AttributeError) as e:
+            # Handle XML parsing errors
+            print("An error occurred while parsing the XML:", str(e))

-# Create an ElementTree object with the output root
-output_tree = ET.ElementTree(output_root)
+    # Add a new element for the run time
+    run_time_element = ET.SubElement(output_root, "run_time")
+    run_time_element.text = run_time

-# Write the output to the XML file, overwriting the existing data
-output_filename = "./indeed_output.xml"
-with open(output_filename, 'wb') as f:
-    output_tree.write(f, encoding="utf-8", xml_declaration=True)
+    # Create an ElementTree object with the output root
+    output_tree = ET.ElementTree(output_root)
+
+    # Write the output to the XML file, overwriting the existing data
+    output_filename = "./indeed_output.xml"
+    with open(output_filename, 'wb') as f:
+        output_tree.write(f, encoding="utf-8", xml_declaration=True)
+
+    print("Script execution completed successfully.")
+
+except Exception as e:
+    # Handle any other uncaught exceptions
+    print("An unexpected error occurred:", str(e))