Files
MoodleScraper/MoodleClasses/MoodleResource.py

62 lines
2.0 KiB
Python

from MoodleClasses.MoodleActivity import MoodleActivity
from MoodleClasses.MoodleFile import MoodleFile
from bs4 import BeautifulSoup
import requests
class MoodleResource(MoodleActivity):
def __init__(self, soup: BeautifulSoup):
super().__init__(soup)
self.url = self._get_url()
self.parent_dir = None
self.file = None
def _get_url(self) -> str:
link = self.soup.find("a")
if link is None:
return None
url = link["href"]
if url in self.all_urls:
self.all_urls.remove(url)
return url
def find(self, s: requests.Session) -> bool:
if self.url is None:
return False
r = s.get(self.url, allow_redirects=True)
if r.url.find("view.php?id=") > 0:
try:
soup = BeautifulSoup(r.text, 'html.parser')
main_region = soup.find("section", {"id": "region-main"})
if main_region is not None:
link = main_region.find("a")
if link is not None:
self.file = MoodleFile(self, link["href"])
return True
else:
img = main_region.find("img")
if img is not None:
self.file = MoodleFile(self, img["src"])
return True
else:
return False
else:
frames = soup.find_all("frame")
if len(frames) > 1:
self.file = MoodleFile(self, frames[1]["src"])
return True
else:
return False
except Exception as e:
return False
else:
self.file = MoodleFile(self, r.url.split("?")[0])
return True
def download(self, s: requests.Session, ignore_extension: list[str]) -> bool:
return self.file.download(s, ignore_extension)