hrss/web/views.py

150 lines
5.4 KiB
Python

from django.shortcuts import render, redirect
from django.http import HttpResponse
from .utils import *
from .models import Feed
from django.db.models import ObjectDoesNotExist
from bs4 import BeautifulSoup
import logging
from urllib.parse import quote_plus, unquote_plus
import traceback
import sys
# Create your views here.
logger = logging.getLogger(__name__)
def iframe(request, encodedurl):
sys.setrecursionlimit(10000)
try:
url = unquote_plus(encodedurl)
req = get_url(url)
html = req.content
bs = False
content_type = req.headers["Content-Type"] if "Content-Type" in req.headers else False
if not content_type or content_type.startswith("text/html"):
logger.debug("No content-type or content-type ~= '^text/html'")
bs = BeautifulSoup(html, 'html.parser')
base_scheme = url.split("://")[0]
base_url = url.split("//")[-1].split("/")[0].split('?')[0]
logger.debug("URL: "+base_scheme+"://"+base_url)
# fixes
# fix click links
all_a = bs.find_all("a")
for a in all_a:
a["href"] = "javascript:void(0)"
# fix absolute CSS
all_links = bs.find_all("link", {"href": True})
for link in all_links:
if link["href"].startswith("/"):
link["href"] = "/iframe/" + base_scheme + "://" + base_url + link["href"]
# test: remove js
all_scripts = bs.find_all("script")
for script in all_scripts:
script.extract()
# fix absolute javascript
all_scripts = bs.find_all("script", {"src": True})
for script in all_scripts:
if script.get("src").startswith("/"):
script["src"] = "/iframe/" + base_scheme + "://" + base_url + script["src"]
# fix absolute images
all_imgs = bs.find_all("img", {"src": True})
for img in all_imgs:
if img.get("src").startswith("/"):
img["src"] = "/iframe/" + base_scheme + "://" + base_url + img["src"]
final_html = str(bs) if bs else html
html = final_html
except Exception as e:
traceback.print_exc()
return HttpResponse("An error has occured", content_type=500)
return HttpResponse(html, content_type=content_type)
def dummy(request):
return HttpResponse("toto")
def homepage(request):
if request.method == 'POST':
if "url" in request.POST and request.POST["url"]:
url = request.POST["url"]
if is_valid_url(url):
return redirect("setup", encodedurl=quote_plus(url))
else:
return render(request, 'homepage.html', {"url": url, "error": url+" is not a valid URL."})
return render(request, 'homepage.html')
def setup(request, encodedurl):
decoded_url = unquote_plus(encodedurl)
if is_valid_url(decoded_url):
return render(request, 'setup.html', {"encodedurl": encodedurl, "url": decoded_url})
else:
return redirect("homepage")
def newfeed(request):
if request.method == 'POST':
if not "url" in request.POST or not request.POST["url"] or not "element" in request.POST or not request.POST["element"] or not "title" in request.POST or not request.POST["title"] or not "content" in request.POST or not request.POST["content"]:
return HttpResponse("Error, missing required element")
url = request.POST["url"]
element = request.POST["element"]
title = request.POST["title"]
content = request.POST["content"]
date = request.POST["date"]
author = request.POST["author"]
link = request.POST["link"]
a = find_longest_common_string(element, title)
title = title[len(a)+3:].strip()
a = find_longest_common_string(element, content)
content = content[len(a)+3:].strip()
a = find_longest_common_string(element, date)
date = date[len(a)+3:].strip()
a = find_longest_common_string(element, author)
author = author[len(a)+3:].strip()
a = find_longest_common_string(element, link)
link = link[len(a)+3:].strip()
feed = Feed(url=url, element=element, title=title, content=content, date=date, author=author, link=link)
feed.save()
return redirect("feeds")
else:
return redirect("homepage")
def feeds(request):
feeds = Feed.objects.all()
return render(request, "feeds.html", {"feeds": feeds})
def feed_delete(request, id):
try:
# demo website: disable deleting feeds
if not request.get_host() == "hrss.hipstercat.fr:443":
Feed.objects.get(pk=id).delete()
logger.info("Removed feed ID "+id)
return redirect("feeds")
else:
return HttpResponse("Deleting is disabled on demo website.", status=403)
except ObjectDoesNotExist:
return redirect("feeds")
def rss(request, uurl):
try:
feed = Feed.objects.get(uurl=uurl)
fetched = fetch_feed(feed)
if not fetched or not fetched["items"]:
return HttpResponse("Error: feed is empty. Did you set up 'element' field correctly?", status=422)
return render(request, "feed.xml", {"feed": feed, "rss": fetched}, content_type="application/rss+xml")
except ObjectDoesNotExist:
return HttpResponse("Error: feed is unknown", status=404)