import requests from bs4 import BeautifulSoup import bs4 import os from urllib.parse import unquote class MoodleActivity: def __init__(self, soup): self.soup = soup self.name = None self.url = None self.type = None self.parent_dir = None self.all_urls = self._get_all_urls() if self.all_urls: self.init() def __repr__(self): return f"{self.type}: {self.name}" def init(self): self.name = self._get_name() self.type = self._get_type() self.url = self._get_url() def _get_all_urls(self): return list(map(lambda x: x["href"], self.soup.find_all("a"))) def _get_name(self): link = self.soup.find("a") text = link.text toremove = link.find("span", {"class": "accesshide"}) if toremove is None: name = text.strip() else: name = text[:text.find(toremove.text)].strip() return unquote(name)[:min(len(name), 50)] def _get_url(self): url = self.soup.find("a")["href"] if url in self.all_urls: self.all_urls.remove(url) return url def _get_type(self): return self.soup["class"][2] def set_parent_dir(self, parent_dir: os.path): self.parent_dir = parent_dir def find_file(self, s: requests.Session) -> bool: r = s.get(self.url, allow_redirects=True) if r.url.find("view.php?id=")>0: try: soup = BeautifulSoup(r.text, 'html.parser') main_region = soup.find("section", {"id": "region-main"}) if main_region is not None: link = main_region.find("a") if link is not None: self.url = link["href"] return True else: img = main_region.find("img") if img is not None: self.url = img["src"] return True else: return False else: frames = soup.find_all("frame") if len(frames) > 1: self.url = frames[1]["src"] return True else: return False except Exception as e: return False else: self.url = r.url.split("?")[0] return True def find_folder(self, s: requests.Session): try: form = self.soup.find("form") url = form["action"] + "?id=" + form.find("input")["value"] if url.find("download_folder.php") > 0: self.url = url return True else: return False except: r = s.get(self.url, allow_redirects=True) if r.url.find("view.php?id=") > 0: url = r.url.replace("view.php", "download_folder.php") self.url = url return True else: return False class MoodleCourseSection: def __init__(self, soup: bs4.element.Tag): self.soup = soup self.title = self._get_title() self.activities = self._get_activities() def __repr__(self): return f"{self.title}: {len(self.activities)} activities" def _get_title(self) -> str: title = self.soup.find("h3", {"class": "sectionname"}).text.strip() return unquote(title)[:min(len(title), 50)] def _get_activities(self) -> list[MoodleActivity]: activities_raw = self.soup.find_all("li", {"class": "activity"}) return list(map(lambda x: MoodleActivity(x), activities_raw)) def set_parent_dir(self, parent_dir: os.path, index: int): for act in self.activities: act.set_parent_dir(os.path.join(parent_dir, f"{index} - {self.title}")) class MoodleCourse: def __init__(self, name_with_code: str, url: str): self.url = url self.code = unquote(name_with_code.split()[0]) self.name = unquote(" ".join(name_with_code.split()[1:]).strip()) self.soup = None self.sections = None def __repr__(self): return f"{self.code} {self.name}: {len(self.sections)} sections" def init(self, s: requests.Session): self.soup = self._get_soup(s) self.sections = self._get_sections() def _get_soup(self, s: requests.Session) -> BeautifulSoup: r = s.get(self.url) return BeautifulSoup(r.text, 'html.parser') def _get_sections(self) -> list[MoodleCourseSection]: sections_raw = self.soup.find_all("li", {"class": "course-section"}) return list(map(lambda x: MoodleCourseSection(x), sections_raw)) def set_parent_dir(self, parent_dir, year: str = None): if year is None: year = self.url.split("/")[3] for i, sec in enumerate(self.sections): sec.set_parent_dir(os.path.join(parent_dir, f"{self.code} - {self.name}", year), i)