Files
MoodleScraper/main.py

57 lines
2.3 KiB
Python

import requests
from bs4 import BeautifulSoup
from MoodleClasses.MoodleCourse import MoodleCourse
def get_moodle_auth_cookie(s: requests.Session, url: str) -> requests.Session:
s.get(f"{url}login/index.php")
s.post(f"https://tequila.epfl.ch/cgi-bin/tequila/login", data={
"requestkey": s.cookies["TequilaPHP"],
"username": args.username,
"password": args.password
})
return s
def get_courses(s: requests.Session, url: str) -> list[MoodleCourse]:
r = s.get(f"{url}user/profile.php?showallcourses=1")
soup = BeautifulSoup(r.text, 'html.parser')
course_node = soup.find_all("section", {"class": "node_category card d-inline-block w-100 mb-3"})[1]
course_list = course_node.find_all("a")
course_names = list(map(lambda x: x.text.strip(), course_list))
course_list = list(map(lambda x: x["href"], course_list))
course_urls = list(map(lambda x: f"{url}course/view.php?id={x.split('&')[-2].split('=')[1]}", course_list))
return list(map(lambda x: MoodleCourse(x[0], x[1]), zip(course_names, course_urls)))
def download_year(target_url: str) -> None:
s = requests.Session()
s = get_moodle_auth_cookie(s, target_url)
courses = get_courses(s, target_url)
for course in courses:
course.init(s)
print(target_url, course.name)
course.set_parent_dir(args.storage_dir, args.academic_year)
for section in course.sections:
for activity in section.activities:
if activity.find(s):
if not activity.download(s, args.ignore_extension):
print(f"Could not download: {activity}")
else:
print(f"Could not find: {activity}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Moodle to download')
parser.add_argument('--username', '-u')
parser.add_argument('--password', '-p')
parser.add_argument('--moodle_url', '--url', '-m', default='https://moodle.epfl.ch/')
parser.add_argument('--academic_year', '--year', '-y', default=None)
parser.add_argument('--storage_dir', '--dir', '-d', default='.')
parser.add_argument('--ignore_extension', '-i', nargs='*')
global args
args = parser.parse_args()
download_year(args.moodle_url)