import requests from django.core.validators import URLValidator from django.core.exceptions import ValidationError from requests.exceptions import RequestException from bs4 import BeautifulSoup def is_valid_url(url): try: URLValidator()(url) requests.get(url).raise_for_status() return True except ValidationError as e: print(url+" is not a valid url") return False except RequestException as e: print(url+" led to exception: "+str(e)) return False def find_longest_common_string(s1, s2): from difflib import SequenceMatcher match = SequenceMatcher(None, s1, s2).find_longest_match(0, len(s1), 0, len(s2)) return s1[match.a: match.a + match.size] # -> apple pie def fetch_feed(feed, limit=10): items=[] rep = requests.get(feed.url).text soup = BeautifulSoup(rep) elements = soup.select(feed.element) for element in elements: if len(items) >= limit: break try: title = element.select(feed.title)[0].text except Exception: return False try: content = element.select(feed.content)[0].text except Exception: return False try: date = element.select(feed.date)[0].text except Exception: date = False try: author = element.select(feed.author)[0].text except Exception: author = False try: link = element.select(feed.link)[0]["href"] except Exception: link = False items.append({"title": title, "content": content, "pubDate": date, "author": author, "link": link}) return {"title": soup.find("title").text, "items": items}