|
@@ -0,0 +1,209 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Author : liuyuqi
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2020/05/03 19:10:15
|
|
|
+@Version : 1.0
|
|
|
+@License : Copyright ? 2017-2020 liuyuqi. All Rights Reserved.
|
|
|
+@Desc : teaching.applysquare.com
|
|
|
+'''
|
|
|
+
|
|
|
+import json
|
|
|
+import logging
|
|
|
+import os
|
|
|
+import re
|
|
|
+import time
|
|
|
+from contextlib import closing
|
|
|
+
|
|
|
+import requests
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
+
|
|
|
+# Function dealing with illegal characters of windows filename
|
|
|
+def filename_filter(name:str):
|
|
|
+ illegal_list = list('/\:*?”"<>|')
|
|
|
+ for char in illegal_list:
|
|
|
+ name = name.replace(char, ' ')
|
|
|
+ return name
|
|
|
+
|
|
|
+def construct_attchment_list(driver, token, pid, uid, cid):
|
|
|
+ attachment_list = list()
|
|
|
+ attachment_info_url = attachment_url_fmt.format(token, pid, 1, uid, cid)
|
|
|
+ driver.get(attachment_info_url)
|
|
|
+ raw_info = re.search(r'\{.*\}', driver.page_source).group(0)
|
|
|
+ info = json.loads(raw_info).get('message')
|
|
|
+ file_num = info.get('count')
|
|
|
+
|
|
|
+ current_page = 1
|
|
|
+ # Add attachment path to attachment_list
|
|
|
+ while len(attachment_list) < file_num:
|
|
|
+ current_url = attachment_url_fmt.format(token, pid, current_page, uid, cid)
|
|
|
+ driver.get(current_url)
|
|
|
+ raw_info = re.search(r'\{.*\}', driver.page_source).group(0)
|
|
|
+ info = json.loads(raw_info).get('message')
|
|
|
+ attachment_list.extend(info.get('list'))
|
|
|
+ current_page += 1
|
|
|
+ return attachment_list
|
|
|
+
|
|
|
+# Load config from config.json
|
|
|
+with open('config.json', 'r') as f:
|
|
|
+ config = json.loads(f.read())
|
|
|
+
|
|
|
+user_name = config.get('username')
|
|
|
+user_passwd = config.get('password')
|
|
|
+headless_mode = config.get('headless_mode')
|
|
|
+download_all_ext = config.get('download_all_ext')
|
|
|
+download_all_courses = config.get('download_all_courses')
|
|
|
+ext_list = config.get('ext_list')
|
|
|
+ext_expel_list = config.get('ext_expel_list')
|
|
|
+cid_list = config.get('cid_list')
|
|
|
+
|
|
|
+# auto_restart = True
|
|
|
+# speed_threshold = 50 * 1024
|
|
|
+
|
|
|
+# Some metadata
|
|
|
+login_url = r"https://teaching.applysquare.com/Home/User/login"
|
|
|
+attachment_url_fmt = r'https://teaching.applysquare.com/Api/CourseAttachment/getList/token/{}?parent_id={}&page={}&plan_id=-1&uid={}&cid={}'
|
|
|
+course_info_url_fmt = r'https://teaching.applysquare.com/Api/Public/getIndexCourseList/token/{}?type=1&usertype=1&uid={}'
|
|
|
+token_pattern = r'(https://teaching\.applysquare\.com/Api/Public/getIndexCourseList/token/.*?)"'
|
|
|
+
|
|
|
+# Start the webdriver
|
|
|
+caps = DesiredCapabilities.CHROME
|
|
|
+caps['loggingPrefs'] = {'performance': 'ALL'}
|
|
|
+opt = webdriver.ChromeOptions()
|
|
|
+opt.add_experimental_option('w3c', False)
|
|
|
+opt.add_argument('log-level=3')
|
|
|
+if headless_mode:
|
|
|
+ opt.add_argument("--headless")
|
|
|
+driver = webdriver.Chrome(options=opt, desired_capabilities=caps)
|
|
|
+
|
|
|
+# Login to Pedagogy Square
|
|
|
+driver.get(login_url)
|
|
|
+time.sleep(1)
|
|
|
+
|
|
|
+driver.find_element_by_xpath(r"/html/body/div[2]/div/div[2]/div/div/div/div/div[2]/div/div/div[1]/input").send_keys(user_name) # Send username
|
|
|
+driver.find_element_by_xpath(r'//*[@id="id_login_password"]').send_keys(user_passwd) # Send password
|
|
|
+driver.find_element_by_xpath(r'//*[@id="id_login_button"]').click() # Submit
|
|
|
+time.sleep(0.5)
|
|
|
+
|
|
|
+# Dealing with student-teacher selection
|
|
|
+try:
|
|
|
+ driver.find_element_by_xpath(r'/html/body/div[2]/div/div[2]/div/div/div[1]/div[2]/div[2]/div[1]/i').click() # Choose student
|
|
|
+ driver.find_element_by_xpath(r'/html/body/div[2]/div/div[2]/div/div/div[1]/div[4]/a').click() # Submit
|
|
|
+except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+time.sleep(0.5)
|
|
|
+if (driver.current_url == r'https://teaching.applysquare.com/S/Index/index'):
|
|
|
+ print("Login Successfully!")
|
|
|
+else:
|
|
|
+ print("Login Error --- Please check your username & password")
|
|
|
+ print("Disable headless mode for detailed information")
|
|
|
+
|
|
|
+# Get token for authorization
|
|
|
+token = None
|
|
|
+while not token:
|
|
|
+ for entry in driver.get_log('performance'):
|
|
|
+ match_obj = re.search(token_pattern, entry.get('message'))
|
|
|
+ if match_obj:
|
|
|
+ temp_url = match_obj.group(1)
|
|
|
+ token = re.search(r'token/(.*?)\?', temp_url).group(1)
|
|
|
+ uid = re.search(r'uid=(.*?)', temp_url).group(1)
|
|
|
+ break
|
|
|
+
|
|
|
+cid2name_dict = dict()
|
|
|
+course_info_url = course_info_url_fmt.format(token, uid)
|
|
|
+driver.get(course_info_url)
|
|
|
+raw_info = re.search(r'\{.*\}', driver.page_source).group(0)
|
|
|
+info = json.loads(raw_info).get('message')
|
|
|
+for entry in info:
|
|
|
+ cid2name_dict[entry.get('cid')] = entry.get('name')
|
|
|
+
|
|
|
+if download_all_courses:
|
|
|
+ cid_list = cid2name_dict.keys()
|
|
|
+
|
|
|
+for cid in cid_list:
|
|
|
+ cid = str(cid) # Prevent bug caused by wrong type of cid
|
|
|
+ course_name = filename_filter(cid2name_dict[cid])
|
|
|
+ print("\nDownloading files of course {}".format(course_name))
|
|
|
+
|
|
|
+ # Create dir for this course
|
|
|
+ try:
|
|
|
+ os.chdir("./{}".format(course_name))
|
|
|
+ except FileNotFoundError:
|
|
|
+ os.mkdir("{}".format(course_name))
|
|
|
+ os.chdir("./{}".format(course_name))
|
|
|
+
|
|
|
+ # Construct attachment list, with some dirs in it
|
|
|
+ course_attachment_list = construct_attchment_list(driver=driver, token=token, pid=0, uid=uid, cid=cid)
|
|
|
+
|
|
|
+ # Iteratively add files in dirs to global attachment list
|
|
|
+ dir_counter = 0
|
|
|
+ for entry in course_attachment_list:
|
|
|
+ if (entry.get('ext') == 'dir'):
|
|
|
+ dir_counter += 1
|
|
|
+ # Add dir content to attachment list
|
|
|
+ dir_id = entry.get('id')
|
|
|
+ course_attachment_list.extend(construct_attchment_list(driver=driver, token=token, pid=dir_id, uid=uid, cid=cid))
|
|
|
+
|
|
|
+ print("Get {:d} files, with {:d} dirs".format(len(course_attachment_list)-dir_counter, dir_counter))
|
|
|
+
|
|
|
+ # Download attachments
|
|
|
+ for entry in course_attachment_list:
|
|
|
+ ext = entry.get('ext')
|
|
|
+ if (ext == 'dir') or (ext in ext_expel_list) or (not download_all_ext and ext not in ext_list):
|
|
|
+ continue
|
|
|
+
|
|
|
+ if (ext in entry.get('title')):
|
|
|
+ filename = filename_filter(entry.get('title'))
|
|
|
+ else:
|
|
|
+ filename = filename_filter("{}.{}".format(entry.get('title'), ext))
|
|
|
+
|
|
|
+ filesize = entry.get('size')
|
|
|
+
|
|
|
+ with closing(requests.get(entry.get('path').replace('amp;', ''), stream=True)) as res:
|
|
|
+ content_size = eval(res.headers['content-length'])
|
|
|
+
|
|
|
+ if filename in os.listdir():
|
|
|
+ # If file is up-to date, continue; else, delete and re-download
|
|
|
+ if os.path.getsize(filename) == content_size:
|
|
|
+ print("File \"{}\" is up-to-date".format(filename))
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ print("Updating File {}".format(filename))
|
|
|
+ os.remove(filename)
|
|
|
+
|
|
|
+ print("Downloading {}, filesize = {}".format(filename, filesize))
|
|
|
+ chunk_size = min(content_size, 10240)
|
|
|
+ with open(filename, "wb") as f:
|
|
|
+ chunk_count = 0
|
|
|
+ start_time = time.time()
|
|
|
+ # previous_time = time.time()
|
|
|
+ # lag_counter = 0
|
|
|
+ total = content_size / 1024 / 1024
|
|
|
+ for data in res.iter_content(chunk_size=chunk_size):
|
|
|
+ chunk_count += 1
|
|
|
+ processed = len(data) * chunk_count / 1024 / 1024
|
|
|
+ current_time = time.time()
|
|
|
+ if chunk_count < 5:
|
|
|
+ print(r" Total: {:.2f} MB Processed: {:.2f} MB ({:.2f}%)".format(total, processed, processed/total*100), end = '\r')
|
|
|
+ else:
|
|
|
+ remaining = (current_time-start_time)/processed*(total-processed)
|
|
|
+ print(r" Total: {:.2f} MB Processed: {:.2f} MB ({:.2f}%), ETA {:.2f}s".format(total, processed, processed/total*100, remaining), end = '\r')
|
|
|
+ f.write(data)
|
|
|
+
|
|
|
+ # speed = chunk_size / 1.0 * (current_time - previous_time)
|
|
|
+ # if speed < speed_threshold:
|
|
|
+ # lag_counter += 1
|
|
|
+ # else:
|
|
|
+ # lag_counter = 0
|
|
|
+
|
|
|
+ # if lag_counter > 10:
|
|
|
+ # print("Restart downloading of file {}".format(filename))
|
|
|
+ # attachment_list.append(entry)
|
|
|
+ # continue
|
|
|
+
|
|
|
+ os.chdir(r'../') # Switch directory
|
|
|
+
|
|
|
+print("Done!")
|