文章摘要
GPT 4
此内容根据文章生成,并经过人工审核,仅用于文章内容的解释与总结
投诉

维吉尼亚密码破解python

具体原理可以自行查看

步骤

1. 获取key的可能长度(使用重合指数攻击法)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def count_IC(cipher):
"""计算重合指数"""
freq = Counter(cipher)
L = len(cipher)
IC = sum(f * (f - 1) for f in freq.values()) / (L * (L - 1))
return IC
def guess_key_length(cipher, max_len=50):
"""猜测密钥长度"""
best_lengths = []
for key_len in range(1, max_len + 1):
groups = ['' for _ in range(key_len)]
for i, char in enumerate(cipher):
groups[i % key_len] += char
avg_IC = np.mean([count_IC(group) for group in groups])
diff = abs(avg_IC - 0.0667) # 英语文本的IC约为0.0667
best_lengths.append((diff, key_len))
best_lengths.sort()
return [length for _, length in best_lengths]

2. 猜测key的可能值(字母频率分析)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def frequency_analysis(group, num_candidates=3):
"""频率分析,找到最匹配的位移"""
R_list = []
group_len = len(group) # 分组长度

for shift in range(26): # 遍历所有可能的位移(0 到 25)
# 统计位移后字母的频率
shifted_freq = Counter(
chr(((ord(char) - ord('A') - shift) % 26) + ord('A')) for char in group
)
# 计算内积 R = ∑(P_i * Q_i)
R = 0
for char in ENGLISH_FREQ:
# P_i: 英语字母频率
# Q_i: 分组中字母频率
Q_i = shifted_freq.get(char, 0) / group_len
R += ENGLISH_FREQ[char] * Q_i
R_list.append((shift, R))
R_list.sort(key=lambda x: -x[1]) # 按R值从大到小排序
# 返回前num_candidates个位移及其R值
return [(chr(shift + ord('A')), R) for shift, R in R_list[:num_candidates]]

def guess_key(cipher, key_len):
"""猜测密钥,按照优先级列出所有可能的密钥"""
groups = [cipher[i::key_len] for i in range(key_len)] # 按密钥长度分组
candidates = []
for group in groups:
# 对每个分组进行频率分析,获取第一个可能的位移及其R值
shifts = frequency_analysis(group, num_candidates=1)
candidates.append(shifts)
# 生成所有可能的密钥组合,并计算其总R值
keys_with_score = []
for key in product(*candidates):
key_str = ''.join([k[0] for k in key]) # 密钥字符串
total_R = sum(k[1] for k in key) # 密钥的总R值
keys_with_score.append((key_str, total_R))
# 按总R值从大到小排序
keys_with_score.sort(key=lambda x: -x[1])
# 返回按优先级排序的密钥列表
return [key for key, _ in keys_with_score]

完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from collections import Counter
import numpy as np
from itertools import product

KEYLENGTHNUM = 5 # 密钥长度候选数量

# 英语字母频率表
ENGLISH_FREQ = {
'A': 0.08167, 'B': 0.01492, 'C': 0.02782, 'D': 0.04253,
'E': 0.12702, 'F': 0.02228, 'G': 0.02015, 'H': 0.06094,
'I': 0.06966, 'J': 0.00153, 'K': 0.00772, 'L': 0.04025,
'M': 0.02406, 'N': 0.06749, 'O': 0.07507, 'P': 0.01929,
'Q': 0.00095, 'R': 0.05987, 'S': 0.06327, 'T': 0.09056,
'U': 0.02758, 'V': 0.00978, 'W': 0.0236, 'X': 0.0015,
'Y': 0.01974, 'Z': 0.00074
}

def alpha(cipher):
"""提取密文中的字母并统一转换为大写"""
return ''.join([char.upper() for char in cipher if char.isalpha()])

def count_IC(cipher):
"""计算重合指数"""
freq = Counter(cipher)
L = len(cipher)
IC = sum(f * (f - 1) for f in freq.values()) / (L * (L - 1))
return IC

def guess_key_length(cipher, max_len=50):
"""猜测密钥长度"""
best_lengths = []
for key_len in range(1, max_len + 1):
groups = ['' for _ in range(key_len)]
for i, char in enumerate(cipher):
groups[i % key_len] += char
avg_IC = np.mean([count_IC(group) for group in groups])
diff = abs(avg_IC - 0.0667) # 英语文本的IC约为0.0667
best_lengths.append((diff, key_len))
best_lengths.sort()
return [length for _, length in best_lengths]

def frequency_analysis(group, num_candidates=3):
"""频率分析,找到最匹配的位移"""
R_list = []
group_len = len(group) # 分组长度

for shift in range(26): # 遍历所有可能的位移(0 到 25)
# 统计位移后字母的频率
shifted_freq = Counter(
chr(((ord(char) - ord('A') - shift) % 26) + ord('A')) for char in group
)
# 计算内积 R = ∑(P_i * Q_i)
R = 0
for char in ENGLISH_FREQ:
# P_i: 英语字母频率
# Q_i: 分组中字母频率
Q_i = shifted_freq.get(char, 0) / group_len
R += ENGLISH_FREQ[char] * Q_i
R_list.append((shift, R))
R_list.sort(key=lambda x: -x[1]) # 按R值从大到小排序
# 返回前num_candidates个位移及其R值
return [(chr(shift + ord('A')), R) for shift, R in R_list[:num_candidates]]

def guess_key(cipher, key_len):
"""猜测密钥,按照优先级列出所有可能的密钥"""
groups = [cipher[i::key_len] for i in range(key_len)] # 按密钥长度分组
candidates = []
for group in groups:
# 对每个分组进行频率分析,获取第一个可能的位移及其R值
shifts = frequency_analysis(group, num_candidates=1)
candidates.append(shifts)
# 生成所有可能的密钥组合,并计算其总R值
keys_with_score = []
for key in product(*candidates):
key_str = ''.join([k[0] for k in key]) # 密钥字符串
total_R = sum(k[1] for k in key) # 密钥的总R值
keys_with_score.append((key_str, total_R))
# 按总R值从大到小排序
keys_with_score.sort(key=lambda x: -x[1])
# 返回按优先级排序的密钥列表
return [key for key, _ in keys_with_score]

def vigenere_decrypt(cipher, key):
"""维吉尼亚解密"""
decrypted = []
key_length = len(key)
key_as_int = [ord(k) - ord('A') for k in key]
index = 0
for char in cipher:
if char.isalpha():
offset = key_as_int[index % key_length]
if char.isupper():
value = (ord(char) - offset - ord('A')) % 26
decrypted.append(chr(value + ord('A')))
else:
value = (ord(char) - offset - ord('a')) % 26
decrypted.append(chr(value + ord('a')))
index += 1
else:
decrypted.append(char) # 保留标点和空格
return ''.join(decrypted)

if __name__ == '__main__':
# 密文
cipher = """Zs vxb fxcnz evm enare usca tvdcs wgkduhtytero xjwmato xpjb jqjl krth bhzl tk hcbb nvb aolmb. Xhase
jsb bw zcah plm vigmpiwp ib bum gdaet, owe qbewpa jtbvc ijzmbb yqde smbq urilijk iwl nzc ob
ivmceqlg sszcp. Nlb lkzm cw n pmuoi iwl lws hwzm j pbuc. Azh zroubcoqwvnaf bm a ymbh ial woq
liem n kmmiyvrbl. Ibd pvccp gw y pepm xn emb bnmkt ial woq liem n aahkst. Jlq zclekqxv gw rha
lcktrar ob ilrnvkcs wrl hwh pyva e ajvpbsanc. """
alpha_cipher = alpha(cipher)
key_len = guess_key_length(alpha_cipher)[:KEYLENGTHNUM]
print("候选密钥长度:", key_len)
for length in key_len:
keys = guess_key(alpha_cipher, length)
# keys = guess_key(alpha_cipher, length)
print(f"\n密钥长度 {length} 的可能密钥(按优先级排序):")
for key in keys:
decrypted_text = vigenere_decrypt(cipher, key)
print(f"密钥: {key}")
print(f"解密结果: {decrypted_text}")

原理参考文章:

重合指数攻击:

https://blog.csdn.net/forest_LL/article/details/135300302

字母频率分析:

https://www.cnblogs.com/ISGuXing/p/9665904.html