vigenere.rs
// Implemented based on the techniques described in | |
// the notes by Shai Simonson: 'Public Key Crpytography' | |
// http://web.stonehill.edu/compsci/Shai_papers/RSA.pdf | |
// (Page 9-10) | |
use std::f32; | |
// Index of Coincidence for English text | |
// http://www.cs.mtu.edu/~shene/NSF-4/Tutorial/VIG/Vig-IOC.html | |
const IOC: f32 = 0.066332; | |
// Relative frequencies of English letter | |
// H. Beker and F. Piper, Cipher Systems, Wiley-Interscience, 1982. | |
const FREQS: [f32; 26] = [8.167, 1.492, 2.782, 4.253, 12.702, 2.228, 2.015, 6.094, 6.996, 0.153, | |
0.772, 4.025, 2.406, 6.749, 7.507, 1.929, 0.095, 5.987, 6.327, 9.056, | |
2.758, 0.978, 2.360, 0.150, 1.974, 0.074]; | |
const MIN_SHIFT: usize = 3; | |
const MAX_SHIFT: usize = 10; | |
fn m(x: i8, y: i8) -> i8 { | |
((x % y) + y) % y | |
} | |
fn find_codeword_len(cipher_text: &[u8]) -> Option<usize> { | |
// Try all the lens from MIN to MAX | |
(MIN_SHIFT..MAX_SHIFT) | |
.map(|shift| ( | |
(shift, | |
// For each shift length, iterate over the cipher text | |
cipher_text.iter() | |
// And at the same time iterate with the same | |
// cipher text but shifted by `shift` | |
.zip(cipher_text.iter().skip(shift).cycle()) | |
// Keep only the bytes which match in the cipher text | |
// and the shifted cipher text | |
.filter(|&(&x, &y)| x == y) | |
// Now count how many bytes matched | |
.count() as f32) | |
)) | |
// Now, go over each shift and the corresponding number of matches | |
// to calculate the Index of Coincidence | |
.map(|(n, k)| (n, k as f32 / cipher_text.len() as f32)) | |
// Next, calculate the distance from our reference value for English text | |
.map(|(n, k)| (n, (k - IOC).abs())) | |
// Finally, take the closest value | |
.fold((None, f32::INFINITY), |(n, k), (m, b)| { | |
if k < b { | |
(n, k) | |
} else { | |
(Some(m), b) | |
} | |
}) | |
.0 | |
} | |
fn find_codeword(cipher_text: &[u8], len: usize) -> String { | |
let mut codeword = String::with_capacity(len); | |
// We group the cipher text into columns that share a key | |
let columns = (0..len).map(|skip| { | |
// Iterate over the cipher text | |
cipher_text.iter() | |
// Keeping track of the index | |
.enumerate() | |
// Now keep the relevant ones | |
.filter(|&(i, _)| i % len == skip) | |
// We don't need the index anymore | |
.map(|(_, &x)| x) | |
// And let's put that all in a vector | |
.collect::<Vec<_>>() | |
}); | |
// Now let's process each column in turn | |
for column in columns { | |
let mut best_shift = 0; | |
let mut best_norm = f32::INFINITY; | |
// Each column is essentially encoded as a caeser chipher so we try | |
// all 26 possibilities and compare the frequency distributions | |
for shift in 0..26 { | |
// Now let's find the # of occurrences of each letter | |
let mut freqs = [0.0f32; 26]; | |
let len = column.iter() | |
// Take the ascii value, turn it 0-based, shift it and mod by 26 | |
.map(|&x| m(x as i8 - b'A' as i8 - shift as i8, 26)) | |
// Update our counts | |
.inspect(|x| freqs[*x as usize] += 1.) | |
// and finally just return the total which we use to normalize with | |
.count(); | |
// Use the square difference to determine a measure of similarity | |
// to typical english text. | |
let norm: f32 = freqs.iter() | |
// first we normalize each count by the total | |
// and multiply by 100 so we match our reference data | |
.map(|&x| x / len as f32 * 100.) | |
// also iterate over the reference values for english text | |
.zip(FREQS.iter()) | |
// the square of the difference | |
.map(|(x, &y)| (x - y).powi(2)) | |
// and finally sum it all up | |
.fold(0., |x, y| x + y); | |
// Update our values if we find anything better | |
if norm < best_norm { | |
best_norm = norm; | |
best_shift = shift; | |
} | |
} | |
// Update our codeword with our best guess | |
codeword.push((best_shift + b'A') as char); | |
} | |
codeword | |
} | |
fn main() { | |
let cipher_text = include_str!("cipher1.txt").trim(); | |
println!("Cipher Text:\n{}\n", cipher_text); | |
if let Some(len) = find_codeword_len(cipher_text.as_ref()) { | |
println!("Probable Codeword Len: {}\n", len); | |
let codeword = find_codeword(cipher_text.as_ref(), len); | |
println!("Code word is probably: {}\n", codeword); | |
let decoded = cipher_text.bytes() | |
.zip(codeword.bytes().cycle()) | |
.map(|(x, y)| m(x as i8 - y as i8, 26)) | |
.map(|x| (x as u8 + b'A') as char) | |
.collect::<String>(); | |
println!("Decoded Text:\n{}", decoded); | |
} else { | |
panic!("Unable to determine codeword len."); | |
} | |
} |