hrss/web/utils.py

58 lines
1.7 KiB
Python

import requests
from django.core.validators import URLValidator
from django.core.exceptions import ValidationError
from requests.exceptions import RequestException
from bs4 import BeautifulSoup
def is_valid_url(url):
try:
URLValidator()(url)
requests.get(url).raise_for_status()
return True
except ValidationError as e:
print(url+" is not a valid url")
return False
except RequestException as e:
print(url+" led to exception: "+str(e))
return False
def find_longest_common_string(s1, s2):
from difflib import SequenceMatcher
match = SequenceMatcher(None, s1, s2).find_longest_match(0, len(s1), 0, len(s2))
return s1[match.a: match.a + match.size] # -> apple pie
def fetch_feed(feed, limit=10):
items=[]
rep = requests.get(feed.url).text
soup = BeautifulSoup(rep)
elements = soup.select(feed.element)
for element in elements:
if len(items) >= limit:
break
try:
title = element.select(feed.title)[0].text
except Exception:
return False
try:
content = element.select(feed.content)[0].text
except Exception:
return False
try:
date = element.select(feed.date)[0].text
except Exception:
date = False
try:
author = element.select(feed.author)[0].text
except Exception:
author = False
try:
link = element.select(feed.link)[0]["href"]
except Exception:
link = False
items.append({"title": title, "content": content, "pubDate": date, "author": author, "link": link})
return {"title": soup.find("title").text, "items": items}