中国海洋大学新版教务系统爬虫

最近选课开始,咱们海大也是上线了花了大钱搞的新版选课系统,我们来简单看一下新版选课系统是否有之前的漏洞

教务系统官网

首先是教务系统官网:http://jwgl2024.ouc.edu.cn/

点进去就会发现其实没有写完

image-20250605225430166

我们抓包查看一下教务系统的登录接口加密:

image-20250605225632267

发现参数中有一个encode显然是加密的密码,于是在js中寻在encode如何生成。

找到结果如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
function login() {
if ($("#userAccount").val() == "") {
$("#showMsg").text("请输入账号");
$("#userAccount").focus();
return false;
}
if ($("#userPassword").val() == "") {
$("#showMsg").text("请输入密码");
$("#userPassword").focus();
return false;
}


var strUrl = "/jsxsd/Logon.do?method=logon&flag=sess";
var codeDogSequence = '';
if ('false' === 'true') {
try {
var res = GetSerialNumber();
if (res.code === 0) {
codeDogSequence = res.data.result;
} else if (res.code === 9001) {
// 加密狗设备未认证
}
} catch (e) { }
}

$.ajax({
url: strUrl,
type: "post",
cache: false,
dataType: "text",
success: function (dataStr) {
if (dataStr == "no") {
return false;
} else {
var scode = dataStr.split("#")[0];
var sxh = dataStr.split("#")[1];

var code = document.getElementById("userAccount").value + "%%%" + document.getElementById("userPassword").value + "%%%" + codeDogSequence;
var encoded = "";
for (var i = 0; i < code.length; i++) {
if (i < 55) {
encoded = encoded + code.substring(i, i + 1) + scode.substring(0, parseInt(sxh.substring(i, i + 1)));
scode = scode.substring(parseInt(sxh.substring(i, i + 1)), scode.length);
} else {
encoded = encoded + code.substring(i, code.length);
i = code.length;
}
}
document.getElementById("encoded").value = encoded;
if ("LoginToXk" != "logonLdap") {
document.getElementById("userPassword").value = "";
}
document.getElementById("loginForm").submit();
}
},
error: function () {
window.qzAlert("warning", "计算异常!");
}
});
}

发现encode由scode、sxh、codeDogSequence构成,scode、sxh通过请求/jsxsd/Logon.do?method=logon&flag=sess获取。

整体逻辑到这里就很清晰了

整体代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import requests

def get_encoded(account, password, scode, sxh, codeDogSequence=' '):
# 按前端逻辑拼接明文
code = f"{account}%%%{password}%%%{codeDogSequence}"
encoded = ""
i = 0
while i < len(code):
if i < 55:
encoded += code[i]
n = int(sxh[i])
encoded += scode[:n]
scode = scode[n:]
else:
encoded += code[i:]
break
i += 1
return encoded

def get_scode_and_sxh(base_url, session=None):
"""
获取 scode 和 sxh
:param base_url: 例如 http://jwgl2024.ouc.edu.cn
:param session: requests.Session() 对象(可选,建议带上以保持 cookie)
:return: (scode, sxh)
"""
url = f"{base_url}/Logon.do?method=logon&flag=sess"
sess = session or requests.Session()
headers = {
"Accept": "text/plain, */*; q=0.01",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "0",
"Origin": base_url,
"Pragma": "no-cache",
"Referer": f"{base_url}/",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
resp = sess.post(url, headers=headers)
if resp.status_code == 200:
data = resp.text
if "#" in data:
scode, sxh = data.split("#", 1)
return scode, sxh
else:
raise Exception("返回内容格式不正确")
else:
raise Exception(f"请求失败,状态码: {resp.status_code}")

def login_jwgl(base_url, account, encoded, session=None):
"""
登录教务系统
:param base_url: 例如 http://jwgl2024.ouc.edu.cn
:param account: 用户账号
:param encoded: 加密后的encoded参数
:param session: requests.Session() 对象(可选)
:return: 登录后的响应内容
"""
url = f"{base_url}/Logon.do?method=logon"
sess = session or requests.Session()
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Origin": base_url,
"Pragma": "no-cache",
"Referer": f"{base_url}/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
}
data = {
"loginMethod": "logon",
"userlanguage": "0",
"userAccount": account,
"userPassword": "",
"encoded": encoded
}
resp = sess.post(url, headers=headers, data=data)
# 打印cookies
print("Cookies after login:", resp.cookies.get_dict())
return resp

# 示例用法
if __name__ == "__main__":
account = "" # 学号
password = "" # 密码
base_url = "http://jwgl2024.ouc.edu.cn"
# 1. 先访问首页获取cookie
session = requests.Session()
home_url = base_url + "/"
home_resp = session.get(home_url)
# 2. 用同一个session获取scode和sxh
scode, sxh = get_scode_and_sxh(base_url, session=session)
# 3. codeDogSequence 逻辑
codeDogSequence = ' '
# 如需支持UKey,可在此处补充获取逻辑
encoded = get_encoded(account, password, scode, sxh, codeDogSequence)
print("Encoded Input:", encoded)
# 4. 用同一个session登录
resp = login_jwgl(base_url, account, encoded, session=session)
print("Login response status:", resp.status_code)
print("当前会话所有cookie:", session.cookies.get_dict())
# print("Login response text:\n", resp.text)
# 保存到html文件
with open("login_result.html", "w", encoding="utf-8") as f:
f.write(resp.text)
print("已保存到 login_result.html")

选课系统官网

选课系统是教务系统后面加上相对路径:/jsxsd

本以为和教务系统官网认证方式一样,看了代码发现有些差异:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
function submitForm1() {
try {
var xh = document.getElementById("userAccount").value;
var pwd = document.getElementById("userPassword").value;
if (xh == "") {
window.qzAlert("warning", "用户名不能为空!");
return false;
}
if (pwd == "") {
window.qzAlert("warning", "密码不能为空!");
return false;
}

var account = encodeInp(xh);

var passwd = encodeInp(pwd);

var codeDogSequence = ' ';
try {
var res = GetSerialNumber();
if (res.code === 0) {
codeDogSequence = res.data.result;
} else if (res.code === 9001) {
// 加密狗设备未认证
}
} catch (e) { }

codeDogSequence = encodeInp(codeDogSequence);

var code = account + "%%%" + passwd + "%%%" + codeDogSequence;
var scode = "QN929X4k0nBGIFeq6246Af1aef5qmj9jS1Ly0b7S651o628M29i9bA5N047uD2uI37a87k4e1tA8674llKdjv9n3d39afxego4A02M89ZnO38f305nr";
var sxh = "2311312233213321312123132323311133323211132123221331323";
var encoded = "";
for (var i = 0; i < code.length; i++) {
if (i < 55) {
encoded = encoded + code.substring(i, i + 1) + scode.substring(0, parseInt(sxh.substring(i, i + 1)));
scode = scode.substring(parseInt(sxh.substring(i, i + 1)), scode.length);
} else {
encoded = encoded + code.substring(i, code.length);
i = code.length;
}
}
document.getElementById("encoded").value = encoded;
// var jzmmid = document.getElementById("Form1").jzmmid;
// alert('=====sss=');
if ("LoginToXk" != "logonLdap") {
document.getElementById("userPassword").value = "";
}

var lang = document.getElementById("userlanguage").value;
sessionStorage.clear();
sessionStorage.setItem('language', lang);
document.loginForm.submit();
} catch (e) {
alert(e);
return false;
}
}

还是那几个变量,当时使用了一个名为encodeInp的函数对变量进行第一次对处理,追溯之后发现这个函数其实就是base64加密。也就是说其实只是多了一个base64加密,尝试按照上面的写法发现访问不通,细看之后发现scode、sxh和之前也不一样,之前是使用接口自己请求,这次是在请求整个页面的时候就直接附带了变量的值,因此我们通过正则表达式进行提取

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def encodeInp(input_str):
if isinstance(input_str, str):
input_bytes = input_str.encode('utf-8')
else:
input_bytes = input_str
return base64.b64encode(input_bytes).decode('utf-8')

def get_encoded(account, password, codeDogSequence, scode, sxh):
# 按前端逻辑拼接加密后的账号、密码、加密狗
code = f"{account}%%%{password}%%%{codeDogSequence}"
encoded = ""
i = 0
while i < len(code):
if i < 55:
encoded += code[i]
n = int(sxh[i])
encoded += scode[:n]
scode = scode[n:]
else:
encoded += code[i:]
break
i += 1
return encoded

def extract_scode_sxh_from_html(html):
"""
只匹配 var scode = "..."; var sxh = "..."; 这种静态字符串赋值,
跳过 dataStr.split("#") 这种动态赋值
"""

# 只匹配静态字符串赋值的 scode/sxh,排除包含 split("#") 的行
scode_match = re.search(r'var scode\s*=\s*"([^"]+)";', html)
sxh_match = re.search(r'var sxh\s*=\s*"([^"]+)";', html)
# 检查赋值语句前后是否包含 split("#"),如果有则跳过
if scode_match and sxh_match:
# 检查是否为静态字符串赋值(前后20字符内不能有 split("#"))
scode_start = scode_match.start()
sxh_start = sxh_match.start()
context = html[max(0, scode_start-20):scode_match.end()+20] + html[max(0, sxh_start-20):sxh_match.end()+20]
if 'split("#")' not in context and ".split('#')" not in context:
return scode_match.group(1), sxh_match.group(1)
raise Exception("未找到静态 scode 或 sxh,请检查页面结构")

def login_xk(base_url, account, password, session=None):
sess = session or requests.Session()
# 1. 获取首页HTML并提取scode和sxh
home_url = base_url + "/jsxsd/"
home_resp = sess.get(home_url)
html = home_resp.text
scode, sxh = extract_scode_sxh_from_html(html)
print("scode:", scode)
print("sxh:", sxh)
# 2. base64加密
account_enc = encodeInp(account)
password_enc = encodeInp(password)
codeDogSequence = ' '
codeDogSequence_enc = encodeInp(codeDogSequence)
# 3. 生成encoded
encoded = get_encoded(account_enc, password_enc, codeDogSequence_enc, scode, sxh)
# 4. 构造POST请求
url = f"{base_url}/jsxsd/xk/LoginToXk"
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Origin": base_url,
"Pragma": "no-cache",
"Referer": f"{base_url}/jsxsd/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
}
data = {
"loginMethod": "LoginToXk",
"userlanguage": "0",
"userAccount": account,
"userPassword": "",
"encoded": encoded
}
resp = sess.post(url, headers=headers, data=data)
print("Login response status:", resp.status_code)
print('设置的cookie:', resp.cookies.get_dict())
print("当前会话所有cookie:", sess.cookies.get_dict())
with open("login_xk_result.html", "w", encoding="utf-8") as f:
f.write(resp.text)
print("已保存到 login_xk_result.html")
return resp

到这里其实登录已经成功,但是我们想爬取一下自己的选课记录,由此看看有没有之前的漏洞

image-20250605230947957

发现这是一个get请求,那么到底是怎么识别身份的呢,我们看了一下cookie,发现cookie从访问首页到登录成功,再到这一步一自没有变化,说明认证逻辑是:

首页服务器发送随机cookie,登录成功之后将用户与cookie进行绑定

那么我便尝试一下使用脚本登录之后在使用cookie访问这个get请求,却收到了

[!WARNING]

当前账号已在别处登录,请重新登录进入选课!

这样的警告。

说明一定是某个环节出现了问题,重新看一下整个流程,发现:

image-20250605231647734

选课之前有一个进入选课,这个button也是一个发送请求的按键

image-20250605231801092

在进入选课之前需要先访问一个创建选课的接口,然后才可以访问选课的url,有可能是处于某些性能方面的策略才这样设计的,通过下面的cookie字段也可以发现

name value
SERVERID Jw4

value从jw3到jw6不断变化,有可能jw1和jw2也有,这也许是多个服务器之间进行负载均衡了。

至此选课系统的爬虫告一段落。获取选课记录的函数如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def get_xsxk_tzsm(base_url, session=None):
"""
获取学生选课通知说明页面
先请求mzlist.do接口,再请求newXsxkzx接口,再请求xsxk_tzsm
:param base_url: 例如 http://jwgl2024.ouc.edu.cn
:param session: requests.Session() 对象
:return: 响应对象
"""
sess = session or requests.Session()

# 1. 首先请求newXsxkzx接口(新增的关键步骤)
new_xsxk_url = f"{base_url}/jsxsd/xsxk/newXsxkzx"
params = {
'jx0502zbid': '3CB5600F91374DE3895EF44E942970C0',
'isallsc': ''
}
new_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Host': 'jwgl2024.ouc.edu.cn',
'Referer': f'http://jwgl2024.ouc.edu.cn/jsxsd/xsxk/xklc_list?isallsc=',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
}

# 带参数请求(关键步骤)
new_resp = sess.get(
new_xsxk_url,
params=params,
headers=new_headers,
verify=False
)
print(f"NewXsxkzx Status: {new_resp.status_code}")
# 保存newXsxkzx响应为HTML
with open("new_xsxkzx_result.html", "w", encoding="utf-8") as f:
f.write(new_resp.text)
print(f"NewXsxkzx Response saved to new_xsxkzx_result.html")


# 2. 然后请求xsxk_tzsm接口(原逻辑)
tzsm_url = f"{base_url}/jsxsd/xsxk/xsxk_tzsm"
tzsm_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Referer': f"{base_url}/jsxsd/xsxk/selectBottom?jx0502zbid=3CB5600F91374DE3895EF44E942970C0&sfylxkstr=",
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
}

response = sess.get(tzsm_url, headers=tzsm_headers, verify=False)

print(f"Status Code: {response.status_code}")
with open("xsxk_tzsm_result.html", "w", encoding="utf-8") as f:
f.write(response.text)
print("已保存到 xsxk_tzsm_result.html")

return response

总而言之,系统做的还行,我还没有发现什么漏洞,当然我也发现不了太高明的漏洞,但感觉不值传言中花费的300w。