58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
import requests
|
|
from django.core.validators import URLValidator
|
|
from django.core.exceptions import ValidationError
|
|
from requests.exceptions import RequestException
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def is_valid_url(url):
|
|
try:
|
|
URLValidator()(url)
|
|
requests.get(url).raise_for_status()
|
|
return True
|
|
except ValidationError as e:
|
|
print(url+" is not a valid url")
|
|
return False
|
|
except RequestException as e:
|
|
print(url+" led to exception: "+str(e))
|
|
return False
|
|
|
|
|
|
def find_longest_common_string(s1, s2):
|
|
from difflib import SequenceMatcher
|
|
|
|
match = SequenceMatcher(None, s1, s2).find_longest_match(0, len(s1), 0, len(s2))
|
|
return s1[match.a: match.a + match.size] # -> apple pie
|
|
|
|
def fetch_feed(feed, limit=10):
|
|
items=[]
|
|
rep = requests.get(feed.url).text
|
|
soup = BeautifulSoup(rep)
|
|
elements = soup.select(feed.element)
|
|
for element in elements:
|
|
if len(items) >= limit:
|
|
break
|
|
|
|
try:
|
|
title = element.select(feed.title)[0].text
|
|
except Exception:
|
|
return False
|
|
try:
|
|
content = element.select(feed.content)[0].text
|
|
except Exception:
|
|
return False
|
|
try:
|
|
date = element.select(feed.date)[0].text
|
|
except Exception:
|
|
date = False
|
|
try:
|
|
author = element.select(feed.author)[0].text
|
|
except Exception:
|
|
author = False
|
|
try:
|
|
link = element.select(feed.link)[0]["href"]
|
|
except Exception:
|
|
link = False
|
|
items.append({"title": title, "content": content, "pubDate": date, "author": author, "link": link})
|
|
return {"title": soup.find("title").text, "items": items} |