import requests from bs4 import BeautifulSoup from MoodleCourse import MoodleCourse import os import pathvalidate from urllib.parse import unquote def get_moodle_auth_cookie(s: requests.Session, url: str) -> requests.Session: s.get(f"{url}login/index.php") s.post(f"https://tequila.epfl.ch/cgi-bin/tequila/login", data={ "requestkey": s.cookies["TequilaPHP"], "username": args.username, "password": args.password }) return s def get_courses(s: requests.Session, url: str) -> list[MoodleCourse]: r = s.get(f"{url}user/profile.php?showallcourses=1") soup = BeautifulSoup(r.text, 'html.parser') course_node = soup.find_all("section", {"class": "node_category card d-inline-block w-100 mb-3"})[1] course_list = course_node.find_all("a") course_names = list(map(lambda x: x.text.strip(), course_list)) course_list = list(map(lambda x: x["href"], course_list)) course_urls = list(map(lambda x: f"{url}course/view.php?id={x.split('&')[-2].split('=')[1]}", course_list)) return list(map(lambda x: MoodleCourse(x[0], x[1]), zip(course_names, course_urls))) def download_resource(s, activity): directory = os.path.join(activity.parent_dir, activity.name) directory = pathvalidate.sanitize_filepath(directory) file = os.path.join(directory, unquote(activity.url.split('/')[-1])) file = pathvalidate.sanitize_filepath(file) if args.ignore_extension is not None and str(file).split(".")[-1] in args.ignore_extension: return True if os.path.exists(os.path.join(file)): return True if not os.path.exists(directory): os.makedirs(directory) try: with s.get(activity.url, stream=True) as r: with open(file+".part", 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) os.rename(file+".part", file) if os.path.exists(file + ".part"): os.remove(file + ".part") except: if os.path.exists(file+".part"): os.remove(file+".part") return False finally: return True def download_folder(s, activity): directory = os.path.join(activity.parent_dir, activity.name) directory = pathvalidate.sanitize_filepath(directory) try: with s.get(activity.url, stream=True) as r: file = os.path.join(directory, unquote(r.headers['Content-Disposition'].split('\'')[-1])) file = pathvalidate.sanitize_filepath(file) if os.path.exists(os.path.join(file)): return True if not os.path.exists(directory): os.makedirs(directory) with open(file + ".part", 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) os.rename(file + ".part", file) if os.path.exists(file + ".part"): os.remove(file + ".part") except: if os.path.exists(file+".part"): os.remove(file+".part") return False finally: return True def download_assign(s, activity): r = s.get(activity.url) soup = BeautifulSoup(r.text, 'html.parser') for candidate in soup.find_all("div", {"class": "fileuploadsubmission"}): link = candidate.find("a") url = link["href"].split('?')[0] with s.get(url, stream=True) as r: directory = os.path.join(activity.parent_dir, activity.name) directory = pathvalidate.sanitize_filepath(directory) file = os.path.join(directory, unquote(link.text)) file = pathvalidate.sanitize_filepath(file) if not os.path.exists(directory): os.makedirs(directory) if os.path.exists(os.path.join(file)): return True with open(file + ".part", 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) os.rename(file + ".part", file) if os.path.exists(file + ".part"): os.remove(file + ".part") return True def download_year(target_url: str): s = requests.Session() s = get_moodle_auth_cookie(s, target_url) courses = get_courses(s, target_url) for course in courses: course.init(s) print(target_url, course.name) course.set_parent_dir(args.storage_dir, args.academic_year) for section in course.sections: for activity in section.activities: if activity.type == "resource": if activity.find_file(s): if not download_resource(s, activity): print("Could not download: ", activity.url) else: print("Could not find: ", activity.url) elif activity.type == "folder": if activity.find_folder(s): if not download_folder(s, activity): print("Could not download: ", activity.url) else: print("Could not find: ", activity.url) elif activity.type == "assign": download_assign(s, activity) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Moodle to download') parser.add_argument('--username', '-u') parser.add_argument('--password', '-p') parser.add_argument('--moodle_url', '--url', '-m', default='https://moodle.epfl.ch/') parser.add_argument('--academic_year', '--year', '-y', default=None) parser.add_argument('--storage_dir', '--dir', '-d', default='.') parser.add_argument('--ignore_extension', '-i', nargs='*') global args args = parser.parse_args() download_year(args.moodle_url)