import xml.etree.ElementTree as ET import random import time from fake_useragent import UserAgent import requests import datetime try: # Get the current date and time current_datetime = datetime.datetime.now() # Format the date and time as a string run_time = current_datetime.strftime("%Y-%m-%d %H:%M:%S") # Define the list of URLs urls = [ "https://rss.indeed.com/rss?q=(it+OR+technology+OR+developer+OR+software)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14", "https://rss.indeed.com/rss?q=(marketing+OR+UI+OR+design)+AND+(bible+OR+christian+OR+jesus)+-LDS+-%22Latter-Day+Saints%22+-catholic+-christian.&fromage=14", ] # Set a custom user agent ua = UserAgent() headers = { 'User-Agent': ua.random, 'Referer': 'https://google.com', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', } # Create a session object to handle cookies session = requests.Session() session.headers.update(headers) # Create a new XML root element to store all the extracted values output_root = ET.Element("root") for url in urls: try: # Download the XML file response = session.get(url) response.raise_for_status() # Raise an exception for non-successful status codes content = response.content # Read the downloaded XML content tree = ET.fromstring(content) # Define the desired attribute names attribute_names = ["title", "link", "source", "guid", "pubDate", "description", "{http://www.georss.org/georss}point"] # Iterate over the "item" elements and extract the desired values for item in tree.findall(".//item"): new_element = ET.SubElement(output_root, "item") # Extract the desired attributes from the "item" element for attribute_name in attribute_names: value = item.find(attribute_name).text if value is not None: new_element.set(attribute_name, value) except requests.exceptions.RequestException as e: # Handle request-related errors print("An error occurred while downloading the XML:", str(e)) except (ET.ParseError, AttributeError) as e: # Handle XML parsing errors print("An error occurred while parsing the XML:", str(e)) # Add a new element for the run time run_time_element = ET.SubElement(output_root, "run_time") run_time_element.text = run_time # Create an ElementTree object with the output root output_tree = ET.ElementTree(output_root) # Write the output to the XML file, overwriting the existing data output_filename = "./indeed_output.xml" with open(output_filename, 'wb') as f: output_tree.write(f, encoding="utf-8", xml_declaration=True) print("Script execution completed successfully.") except Exception as e: # Handle any other uncaught exceptions print("An unexpected error occurred:", str(e))