Changing moodle activity handling
This commit is contained in:
32
MoodleClasses/MoodleActivity.py
Normal file
32
MoodleClasses/MoodleActivity.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
from urllib.parse import unquote
|
||||||
|
import bs4
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleActivity:
|
||||||
|
def __init__(self, soup: bs4.element.Tag):
|
||||||
|
self.soup = soup
|
||||||
|
self.all_urls = self._get_all_urls()
|
||||||
|
self.name = self._get_name()
|
||||||
|
self.parent_dir = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.parent_dir}/{self.name}"
|
||||||
|
|
||||||
|
def _get_all_urls(self) -> list[str]:
|
||||||
|
return list(map(lambda x: x["href"], self.soup.find_all("a")))
|
||||||
|
|
||||||
|
def _get_name(self) -> str:
|
||||||
|
link = self.soup.find("a")
|
||||||
|
if link is None:
|
||||||
|
return self.soup.text.strip().split("\n")[0].strip()
|
||||||
|
text = link.text
|
||||||
|
toremove = link.find("span", {"class": "accesshide"})
|
||||||
|
if toremove is None:
|
||||||
|
name = text.strip()
|
||||||
|
else:
|
||||||
|
name = text[:text.find(toremove.text)].strip()
|
||||||
|
return unquote(name)[:min(len(name), 50)]
|
||||||
|
|
||||||
|
def set_parent_dir(self, parent_dir: os.path) -> None:
|
||||||
|
self.parent_dir = parent_dir
|
||||||
36
MoodleClasses/MoodleAssign.py
Normal file
36
MoodleClasses/MoodleAssign.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
from MoodleClasses.MoodleActivity import MoodleActivity
|
||||||
|
from MoodleClasses.MoodleFile import MoodleFile
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import unquote
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleAssign(MoodleActivity):
|
||||||
|
def __init__(self, soup: BeautifulSoup):
|
||||||
|
super().__init__(soup)
|
||||||
|
self.url = self._get_url()
|
||||||
|
self.parent_dir = None
|
||||||
|
self.files = []
|
||||||
|
|
||||||
|
def _get_url(self) -> str:
|
||||||
|
url = self.soup.find("a")["href"]
|
||||||
|
if url in self.all_urls:
|
||||||
|
self.all_urls.remove(url)
|
||||||
|
return url
|
||||||
|
|
||||||
|
def find(self, s: requests.Session) -> bool:
|
||||||
|
r = s.get(self.url)
|
||||||
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
for candidate in soup.find_all("div", {"class": "fileuploadsubmission"}):
|
||||||
|
link = candidate.find("a")
|
||||||
|
url = link["href"].split('?')[0]
|
||||||
|
filename = unquote(link.text)
|
||||||
|
self.files.append(MoodleFile(self, url))
|
||||||
|
self.files[-1]._get_filepath(filename)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download(self, s: requests.Session, ignore_extension: list[str]) -> bool:
|
||||||
|
result = True
|
||||||
|
for file in self.files:
|
||||||
|
result = result and file.download(s, ignore_extension)
|
||||||
|
return result
|
||||||
35
MoodleClasses/MoodleCourse.py
Normal file
35
MoodleClasses/MoodleCourse.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
from MoodleClasses.MoodleCourseSection import MoodleCourseSection
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import unquote
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleCourse:
|
||||||
|
def __init__(self, name_with_code: str, url: str):
|
||||||
|
self.url = url
|
||||||
|
self.code = unquote(name_with_code.split()[0])
|
||||||
|
self.name = unquote(" ".join(name_with_code.split()[1:]).strip())
|
||||||
|
self.soup = None
|
||||||
|
self.sections = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.code} {self.name}: {len(self.sections)} sections"
|
||||||
|
|
||||||
|
def init(self, s: requests.Session) -> None:
|
||||||
|
self.soup = self._get_soup(s)
|
||||||
|
self.sections = self._get_sections()
|
||||||
|
|
||||||
|
def _get_soup(self, s: requests.Session) -> BeautifulSoup:
|
||||||
|
r = s.get(self.url)
|
||||||
|
return BeautifulSoup(r.text, 'html.parser')
|
||||||
|
|
||||||
|
def _get_sections(self) -> list[MoodleCourseSection]:
|
||||||
|
sections_raw = self.soup.find_all("li", {"class": "course-section"})
|
||||||
|
return list(map(lambda x: MoodleCourseSection(x), sections_raw))
|
||||||
|
|
||||||
|
def set_parent_dir(self, parent_dir: [str, os.path], year: str = None) -> None:
|
||||||
|
if year is None:
|
||||||
|
year = self.url.split("/")[3]
|
||||||
|
for i, sec in enumerate(self.sections):
|
||||||
|
sec.set_parent_dir(os.path.join(parent_dir, f"{self.code} - {self.name}", year), i)
|
||||||
46
MoodleClasses/MoodleCourseSection.py
Normal file
46
MoodleClasses/MoodleCourseSection.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from urllib.parse import unquote
|
||||||
|
from MoodleClasses.MoodleActivity import MoodleActivity
|
||||||
|
from MoodleClasses.MoodleResource import MoodleResource
|
||||||
|
from MoodleClasses.MoodleFolder import MoodleFolder
|
||||||
|
from MoodleClasses.MoodleAssign import MoodleAssign
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from typing import Type
|
||||||
|
import bs4
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleCourseSection:
|
||||||
|
def __init__(self, soup: bs4.element.Tag):
|
||||||
|
self.soup = soup
|
||||||
|
self.title = self._get_title()
|
||||||
|
self.activities = self._get_activities()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.title}: {len(self.activities)} activities"
|
||||||
|
|
||||||
|
def _get_title(self) -> str:
|
||||||
|
title = self.soup.find("h3", {"class": "sectionname"}).text.strip()
|
||||||
|
return unquote(title)[:min(len(title), 50)]
|
||||||
|
|
||||||
|
def _get_activities(self) -> list[Type[MoodleActivity]]:
|
||||||
|
activities_raw = self.soup.find_all("li", {"class": "activity"})
|
||||||
|
activities = []
|
||||||
|
for instance, soup in map(lambda x: (self._classify_activity(x), x), activities_raw):
|
||||||
|
if instance is not None:
|
||||||
|
activities.append(instance(soup))
|
||||||
|
return activities
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _classify_activity(soup: BeautifulSoup) -> [None, MoodleActivity]:
|
||||||
|
activitytype = soup["class"][2]
|
||||||
|
if activitytype == "resource":
|
||||||
|
return MoodleResource
|
||||||
|
if activitytype == "folder":
|
||||||
|
return MoodleFolder
|
||||||
|
if activitytype == "assign":
|
||||||
|
return MoodleAssign
|
||||||
|
return None
|
||||||
|
|
||||||
|
def set_parent_dir(self, parent_dir: os.path, index: int) -> None:
|
||||||
|
for act in self.activities:
|
||||||
|
act.set_parent_dir(os.path.join(parent_dir, f"{index} - {self.title}"))
|
||||||
56
MoodleClasses/MoodleFile.py
Normal file
56
MoodleClasses/MoodleFile.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
from MoodleClasses.MoodleActivity import MoodleActivity
|
||||||
|
from urllib.parse import unquote
|
||||||
|
import pathvalidate
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleFile:
|
||||||
|
def __init__(self, parent_activity: MoodleActivity, url: str):
|
||||||
|
self.parent_activity = parent_activity
|
||||||
|
self.url = url
|
||||||
|
self.directory = self._get_directory()
|
||||||
|
self.filepath = self._get_filepath()
|
||||||
|
self.extension = str(self.filepath).split(".")[-1]
|
||||||
|
|
||||||
|
def _get_directory(self) -> os.path:
|
||||||
|
directory = os.path.join(self.parent_activity.parent_dir, self.parent_activity.name)
|
||||||
|
return pathvalidate.sanitize_filepath(directory)
|
||||||
|
|
||||||
|
def _get_filepath(self, filename: str=None) -> os.path:
|
||||||
|
if filename is None:
|
||||||
|
file = os.path.join(self.directory, unquote(self.url.split('/')[-1]))
|
||||||
|
else:
|
||||||
|
file = os.path.join(self.directory, filename)
|
||||||
|
return pathvalidate.sanitize_filepath(file)
|
||||||
|
|
||||||
|
def _decide_download(self, ignore_extension: list[str]) -> bool:
|
||||||
|
if ignore_extension is not None and self.extension in ignore_extension:
|
||||||
|
return False
|
||||||
|
if os.path.exists(self.filepath):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _download(self, s: requests.Session) -> bool:
|
||||||
|
if not os.path.exists(self.directory):
|
||||||
|
os.makedirs(self.directory)
|
||||||
|
try:
|
||||||
|
with s.get(self.url, stream=True) as r:
|
||||||
|
with open(self.filepath+".part", 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
os.rename(self.filepath+".part", self.filepath)
|
||||||
|
if os.path.exists(self.filepath + ".part"):
|
||||||
|
os.remove(self.filepath + ".part")
|
||||||
|
except Exception as e:
|
||||||
|
if os.path.exists(self.filepath+".part"):
|
||||||
|
os.remove(self.filepath+".part")
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download(self, s: requests.Session, ignore_extension: list[str]) -> bool:
|
||||||
|
if self._decide_download(ignore_extension):
|
||||||
|
return self._download(s)
|
||||||
|
else:
|
||||||
|
return True
|
||||||
47
MoodleClasses/MoodleFolder.py
Normal file
47
MoodleClasses/MoodleFolder.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
from MoodleClasses.MoodleActivity import MoodleActivity
|
||||||
|
from MoodleClasses.MoodleFile import MoodleFile
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import unquote
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleFolder(MoodleActivity):
|
||||||
|
def __init__(self, soup: BeautifulSoup):
|
||||||
|
super().__init__(soup)
|
||||||
|
self.parent_dir = None
|
||||||
|
self.file = None
|
||||||
|
|
||||||
|
def _get_url(self) -> str:
|
||||||
|
url = self.soup.find("a")["href"]
|
||||||
|
if url in self.all_urls:
|
||||||
|
self.all_urls.remove(url)
|
||||||
|
return url
|
||||||
|
|
||||||
|
def find(self, s: requests.Session) -> bool:
|
||||||
|
try:
|
||||||
|
form = self.soup.find("form")
|
||||||
|
url = form["action"] + "?id=" + form.find("input")["value"]
|
||||||
|
if not url.find("download_folder.php") > 0:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.file = MoodleFile(self, url)
|
||||||
|
except:
|
||||||
|
r = s.get(self._get_url(), allow_redirects=True)
|
||||||
|
if not r.url.find("view.php?id=") > 0:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
url = r.url.replace("view.php", "download_folder.php")
|
||||||
|
self.file = MoodleFile(self, url)
|
||||||
|
with s.get(url, allow_redirects=True, stream=True) as rh:
|
||||||
|
if 'Content-Disposition' not in rh.headers:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.file.filepath = self.file._get_filepath(filename=unquote(rh.headers['Content-Disposition'].split('\'')[-1]))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download(self, s: requests.Session, ignore_extension: list[str]) -> bool:
|
||||||
|
return self.file.download(s, ignore_extension)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
61
MoodleClasses/MoodleResource.py
Normal file
61
MoodleClasses/MoodleResource.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
from MoodleClasses.MoodleActivity import MoodleActivity
|
||||||
|
from MoodleClasses.MoodleFile import MoodleFile
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class MoodleResource(MoodleActivity):
|
||||||
|
def __init__(self, soup: BeautifulSoup):
|
||||||
|
super().__init__(soup)
|
||||||
|
self.url = self._get_url()
|
||||||
|
self.parent_dir = None
|
||||||
|
self.file = None
|
||||||
|
|
||||||
|
def _get_url(self) -> str:
|
||||||
|
link = self.soup.find("a")
|
||||||
|
if link is None:
|
||||||
|
return None
|
||||||
|
url = link["href"]
|
||||||
|
if url in self.all_urls:
|
||||||
|
self.all_urls.remove(url)
|
||||||
|
return url
|
||||||
|
|
||||||
|
def find(self, s: requests.Session) -> bool:
|
||||||
|
if self.url is None:
|
||||||
|
return False
|
||||||
|
r = s.get(self.url, allow_redirects=True)
|
||||||
|
if r.url.find("view.php?id=") > 0:
|
||||||
|
try:
|
||||||
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
main_region = soup.find("section", {"id": "region-main"})
|
||||||
|
if main_region is not None:
|
||||||
|
link = main_region.find("a")
|
||||||
|
if link is not None:
|
||||||
|
self.file = MoodleFile(self, link["href"])
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
img = main_region.find("img")
|
||||||
|
if img is not None:
|
||||||
|
self.file = MoodleFile(self, img["src"])
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
frames = soup.find_all("frame")
|
||||||
|
if len(frames) > 1:
|
||||||
|
self.file = MoodleFile(self, frames[1]["src"])
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.file = MoodleFile(self, r.url.split("?")[0])
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download(self, s: requests.Session, ignore_extension: list[str]) -> bool:
|
||||||
|
return self.file.download(s, ignore_extension)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
151
MoodleCourse.py
151
MoodleCourse.py
@@ -1,151 +0,0 @@
|
|||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import bs4
|
|
||||||
import os
|
|
||||||
from urllib.parse import unquote
|
|
||||||
|
|
||||||
|
|
||||||
class MoodleActivity:
|
|
||||||
def __init__(self, soup):
|
|
||||||
self.soup = soup
|
|
||||||
self.name = None
|
|
||||||
self.url = None
|
|
||||||
self.type = None
|
|
||||||
self.parent_dir = None
|
|
||||||
self.all_urls = self._get_all_urls()
|
|
||||||
if self.all_urls:
|
|
||||||
self.init()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"{self.type}: {self.name}"
|
|
||||||
|
|
||||||
def init(self):
|
|
||||||
self.name = self._get_name()
|
|
||||||
self.type = self._get_type()
|
|
||||||
self.url = self._get_url()
|
|
||||||
|
|
||||||
def _get_all_urls(self):
|
|
||||||
return list(map(lambda x: x["href"], self.soup.find_all("a")))
|
|
||||||
|
|
||||||
def _get_name(self):
|
|
||||||
link = self.soup.find("a")
|
|
||||||
text = link.text
|
|
||||||
toremove = link.find("span", {"class": "accesshide"})
|
|
||||||
if toremove is None:
|
|
||||||
name = text.strip()
|
|
||||||
else:
|
|
||||||
name = text[:text.find(toremove.text)].strip()
|
|
||||||
return unquote(name)[:min(len(name), 50)]
|
|
||||||
|
|
||||||
def _get_url(self):
|
|
||||||
url = self.soup.find("a")["href"]
|
|
||||||
if url in self.all_urls:
|
|
||||||
self.all_urls.remove(url)
|
|
||||||
return url
|
|
||||||
|
|
||||||
def _get_type(self):
|
|
||||||
return self.soup["class"][2]
|
|
||||||
|
|
||||||
def set_parent_dir(self, parent_dir: os.path):
|
|
||||||
self.parent_dir = parent_dir
|
|
||||||
|
|
||||||
def find_file(self, s: requests.Session) -> bool:
|
|
||||||
r = s.get(self.url, allow_redirects=True)
|
|
||||||
if r.url.find("view.php?id=")>0:
|
|
||||||
try:
|
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
|
||||||
main_region = soup.find("section", {"id": "region-main"})
|
|
||||||
if main_region is not None:
|
|
||||||
link = main_region.find("a")
|
|
||||||
if link is not None:
|
|
||||||
self.url = link["href"]
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
img = main_region.find("img")
|
|
||||||
if img is not None:
|
|
||||||
self.url = img["src"]
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
frames = soup.find_all("frame")
|
|
||||||
if len(frames) > 1:
|
|
||||||
self.url = frames[1]["src"]
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
self.url = r.url.split("?")[0]
|
|
||||||
return True
|
|
||||||
|
|
||||||
def find_folder(self, s: requests.Session):
|
|
||||||
try:
|
|
||||||
form = self.soup.find("form")
|
|
||||||
url = form["action"] + "?id=" + form.find("input")["value"]
|
|
||||||
if url.find("download_folder.php") > 0:
|
|
||||||
self.url = url
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
except:
|
|
||||||
r = s.get(self.url, allow_redirects=True)
|
|
||||||
if r.url.find("view.php?id=") > 0:
|
|
||||||
url = r.url.replace("view.php", "download_folder.php")
|
|
||||||
self.url = url
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class MoodleCourseSection:
|
|
||||||
def __init__(self, soup: bs4.element.Tag):
|
|
||||||
self.soup = soup
|
|
||||||
self.title = self._get_title()
|
|
||||||
self.activities = self._get_activities()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"{self.title}: {len(self.activities)} activities"
|
|
||||||
|
|
||||||
def _get_title(self) -> str:
|
|
||||||
title = self.soup.find("h3", {"class": "sectionname"}).text.strip()
|
|
||||||
return unquote(title)[:min(len(title), 50)]
|
|
||||||
|
|
||||||
def _get_activities(self) -> list[MoodleActivity]:
|
|
||||||
activities_raw = self.soup.find_all("li", {"class": "activity"})
|
|
||||||
return list(map(lambda x: MoodleActivity(x), activities_raw))
|
|
||||||
|
|
||||||
def set_parent_dir(self, parent_dir: os.path, index: int):
|
|
||||||
for act in self.activities:
|
|
||||||
act.set_parent_dir(os.path.join(parent_dir, f"{index} - {self.title}"))
|
|
||||||
|
|
||||||
|
|
||||||
class MoodleCourse:
|
|
||||||
def __init__(self, name_with_code: str, url: str):
|
|
||||||
self.url = url
|
|
||||||
self.code = unquote(name_with_code.split()[0])
|
|
||||||
self.name = unquote(" ".join(name_with_code.split()[1:]).strip())
|
|
||||||
self.soup = None
|
|
||||||
self.sections = None
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"{self.code} {self.name}: {len(self.sections)} sections"
|
|
||||||
|
|
||||||
def init(self, s: requests.Session):
|
|
||||||
self.soup = self._get_soup(s)
|
|
||||||
self.sections = self._get_sections()
|
|
||||||
|
|
||||||
def _get_soup(self, s: requests.Session) -> BeautifulSoup:
|
|
||||||
r = s.get(self.url)
|
|
||||||
return BeautifulSoup(r.text, 'html.parser')
|
|
||||||
|
|
||||||
def _get_sections(self) -> list[MoodleCourseSection]:
|
|
||||||
sections_raw = self.soup.find_all("li", {"class": "course-section"})
|
|
||||||
return list(map(lambda x: MoodleCourseSection(x), sections_raw))
|
|
||||||
|
|
||||||
def set_parent_dir(self, parent_dir, year: str = None):
|
|
||||||
if year is None:
|
|
||||||
year = self.url.split("/")[3]
|
|
||||||
for i, sec in enumerate(self.sections):
|
|
||||||
sec.set_parent_dir(os.path.join(parent_dir, f"{self.code} - {self.name}", year), i)
|
|
||||||
103
main.py
103
main.py
@@ -1,9 +1,6 @@
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from MoodleCourse import MoodleCourse
|
from MoodleClasses.MoodleCourse import MoodleCourse
|
||||||
import os
|
|
||||||
import pathvalidate
|
|
||||||
from urllib.parse import unquote
|
|
||||||
|
|
||||||
|
|
||||||
def get_moodle_auth_cookie(s: requests.Session, url: str) -> requests.Session:
|
def get_moodle_auth_cookie(s: requests.Session, url: str) -> requests.Session:
|
||||||
@@ -27,84 +24,7 @@ def get_courses(s: requests.Session, url: str) -> list[MoodleCourse]:
|
|||||||
return list(map(lambda x: MoodleCourse(x[0], x[1]), zip(course_names, course_urls)))
|
return list(map(lambda x: MoodleCourse(x[0], x[1]), zip(course_names, course_urls)))
|
||||||
|
|
||||||
|
|
||||||
def download_resource(s, activity):
|
def download_year(target_url: str) -> None:
|
||||||
directory = os.path.join(activity.parent_dir, activity.name)
|
|
||||||
directory = pathvalidate.sanitize_filepath(directory)
|
|
||||||
file = os.path.join(directory, unquote(activity.url.split('/')[-1]))
|
|
||||||
file = pathvalidate.sanitize_filepath(file)
|
|
||||||
if args.ignore_extension is not None and str(file).split(".")[-1] in args.ignore_extension:
|
|
||||||
return True
|
|
||||||
if os.path.exists(os.path.join(file)):
|
|
||||||
return True
|
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
try:
|
|
||||||
with s.get(activity.url, stream=True) as r:
|
|
||||||
with open(file+".part", 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=8192):
|
|
||||||
f.write(chunk)
|
|
||||||
os.rename(file+".part", file)
|
|
||||||
if os.path.exists(file + ".part"):
|
|
||||||
os.remove(file + ".part")
|
|
||||||
except:
|
|
||||||
if os.path.exists(file+".part"):
|
|
||||||
os.remove(file+".part")
|
|
||||||
return False
|
|
||||||
finally:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def download_folder(s, activity):
|
|
||||||
directory = os.path.join(activity.parent_dir, activity.name)
|
|
||||||
directory = pathvalidate.sanitize_filepath(directory)
|
|
||||||
try:
|
|
||||||
with s.get(activity.url, stream=True) as r:
|
|
||||||
file = os.path.join(directory, unquote(r.headers['Content-Disposition'].split('\'')[-1]))
|
|
||||||
file = pathvalidate.sanitize_filepath(file)
|
|
||||||
if os.path.exists(os.path.join(file)):
|
|
||||||
return True
|
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
with open(file + ".part", 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=8192):
|
|
||||||
f.write(chunk)
|
|
||||||
os.rename(file + ".part", file)
|
|
||||||
if os.path.exists(file + ".part"):
|
|
||||||
os.remove(file + ".part")
|
|
||||||
except:
|
|
||||||
if os.path.exists(file+".part"):
|
|
||||||
os.remove(file+".part")
|
|
||||||
return False
|
|
||||||
finally:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def download_assign(s, activity):
|
|
||||||
r = s.get(activity.url)
|
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
|
||||||
for candidate in soup.find_all("div", {"class": "fileuploadsubmission"}):
|
|
||||||
link = candidate.find("a")
|
|
||||||
url = link["href"].split('?')[0]
|
|
||||||
with s.get(url, stream=True) as r:
|
|
||||||
directory = os.path.join(activity.parent_dir, activity.name)
|
|
||||||
directory = pathvalidate.sanitize_filepath(directory)
|
|
||||||
file = os.path.join(directory, unquote(link.text))
|
|
||||||
file = pathvalidate.sanitize_filepath(file)
|
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
if os.path.exists(os.path.join(file)):
|
|
||||||
return True
|
|
||||||
with open(file + ".part", 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=8192):
|
|
||||||
f.write(chunk)
|
|
||||||
os.rename(file + ".part", file)
|
|
||||||
if os.path.exists(file + ".part"):
|
|
||||||
os.remove(file + ".part")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download_year(target_url: str):
|
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
s = get_moodle_auth_cookie(s, target_url)
|
s = get_moodle_auth_cookie(s, target_url)
|
||||||
courses = get_courses(s, target_url)
|
courses = get_courses(s, target_url)
|
||||||
@@ -114,20 +34,11 @@ def download_year(target_url: str):
|
|||||||
course.set_parent_dir(args.storage_dir, args.academic_year)
|
course.set_parent_dir(args.storage_dir, args.academic_year)
|
||||||
for section in course.sections:
|
for section in course.sections:
|
||||||
for activity in section.activities:
|
for activity in section.activities:
|
||||||
if activity.type == "resource":
|
if activity.find(s):
|
||||||
if activity.find_file(s):
|
if not activity.download(s, args.ignore_extension):
|
||||||
if not download_resource(s, activity):
|
print(f"Could not download: {activity}")
|
||||||
print("Could not download: ", activity.url)
|
else:
|
||||||
else:
|
print(f"Could not find: {activity}")
|
||||||
print("Could not find: ", activity.url)
|
|
||||||
elif activity.type == "folder":
|
|
||||||
if activity.find_folder(s):
|
|
||||||
if not download_folder(s, activity):
|
|
||||||
print("Could not download: ", activity.url)
|
|
||||||
else:
|
|
||||||
print("Could not find: ", activity.url)
|
|
||||||
elif activity.type == "assign":
|
|
||||||
download_assign(s, activity)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user