Exercise 15 solutions
Table of Contents
Python
e15.py
import urllib2 # to open file from a URL from numpy import * # for arrays # read in the ciphertext file from the interwebs response = urllib2.urlopen("https://projecteuler.net/project/resources/p059_cipher.txt") responsestring = response.read() cipherascii = fromstring(responsestring, sep=",", dtype="int") # now cipherascii is an array of integers corresponding to # ascii codes # here's a function to decrypt an array of ascii codes # with a repeating decryption key (also ascii codes) # using XOR bitwise decryption # def XORdecrypt(cipherascii, keyascii): # initialize plainascii with zeros plainascii = zeros(shape(cipherascii), dtype="int") i = 0 keylen = len(keyascii) while (i<len(cipherascii)): plainascii[i] = cipherascii[i] ^ (keyascii[i % keylen]) # note the modulo operator i = i +1 return plainascii # for an example, let's try a guess at a cipherkey # cipherkey = "abc" # convert cipherkey to an array of ascii codes keyascii = array(map(ord, cipherkey), dtype="int") plainascii = XORdecrypt(cipherascii, keyascii) plainchars = map(chr, plainascii) plainstring = ''.join(plainchars) print plainstring # nope that doesn't look like English # let's wrap that into a nice function # def getPlaintext(cipherascii, keystring): keyascii = array(map(ord, keystring), dtype="int") plainascii = XORdecrypt(cipherascii, keyascii) plainchars = map(chr, plainascii) plainstring = ''.join(plainchars) return plainstring # example: # ps = getPlaintext(cipherascii, "abc") print ps # ok now we can decrypt given a candidate key. # now we have to decide if the key was correct # i.e. correct if the resulting plaintext is English # There are lots of ways we can proceed. # # Let's try this: we'll write a function that will # take a given string of text as input, and return to us # a "score" for how "english" it looks. Our "score" will # simply be the number of words (separated by spaces) in the # text that are found in an English dictionary # here's some code that loads english dictionary words # that are between three and 8 letters long # from /usr/share/dict/words # # note this uses Python's "list comprehension" notation so # it may look rather obscure # Ewords = [w.strip().lower() for w in open("/usr/share/dict/words") if ((len(w.strip())>=3) and (len(w.strip())<=8))] # and here's a function to count the number of words, # i.e. chunks of text separated by spaces " ", # that are found in the dictionary # def EnglishScore(plainstring): global Ewords Pwords = plainstring.split(" ") # split by spaces ecount = 0 # for each resulting "word" in the plaintext for i in range(len(Pwords)): # words length 3-8 only if ( (len(Pwords[i]) >= 3) and (len(Pwords[i]) <= 8) ): if Pwords[i] in Ewords: ecount = ecount + 1 return ecount # ok now let's try all possible keys (26 * 26 * 26 = 17576 of them) # and for each keep track of the score # and then keep the one with the max score # 'a' is 97, 'z' is 122 # generate a matrix of all possible keys and initialize them to zero # keys = zeros((26*26*26,3), dtype="int") i = 0 for i1 in range(97,123): for i2 in range(97,123): for i3 in range(97,123): keys[i,:] = i1,i2,i3 i = i + 1 print shape(keys) # let's keep track of each key's score # scores = zeros((shape(keys)[0],1)) # initialize all to zero # ok now let's score plaintext for all keys # bestscore = 0.0 # best score so far bestkey = "aaa" # best key so far # for each row (key) for i in range(shape(keys)[0]): # slightly obscure code here I admit keystring = ''.join(map(chr,keys[i,:])) # get the plaintext ps = getPlaintext(cipherascii, keystring) # score the plaintext score = EnglishScore(ps) scores[i] = score # is score better than best so far? if (score > bestscore): bestkey = keystring bestscore = score print "%6d (%s) score=%3d bestscore = %3d bestkey = %s" % (i,keystring,score,bestscore,bestkey) # and here is the best score, best key and corresponding plaintext # print bestscore print bestkey ps = getPlaintext(cipherascii, bestkey) print ps
MATLAB / Octave
e15.m
% load in the cipher1.txt file into an array of ints % cipher = importdata('p059_cipher.txt', ','); % import a dictionary of English words % w = importdata('/usr/share/dict/words', '\n'); % test all 26*26*26 = 17576 keys % alpha = double('a'):double('z'); % 97:122 allkeys = combvec(alpha,alpha,alpha)'; % 17576 x 3 matrix of all of them % let's test all keys % bestkey = 1; bestscore = 0; for i=1:size(allkeys,1) pa = XORdecrypt(cipher, allkeys(i,:)); % decrypt into plaintext array p = char(pa); % convert to characters pw = strsplit(p, ' '); % split plaintext by ' ' into words % count how many plaintext words are found in the dictionary nwords = 0; for j=1:length(pw) % only words length 3 through 8 if ( (length(pw{j}) >= 3) & (length(pw{j}) <= 8) ) if ~isempty(find(strcmp(pw{j},w))) % is it in the dictionary? nwords = nwords + 1; end end end if nwords > bestscore % do we have a new best score? bestscore = nwords; bestkey = i; end disp([num2str(i),' ',char(allkeys(i,:)),' score= ',num2str(nwords),' bestscore= ',num2str(bestscore),' bestkey= ',char(allkeys(bestkey,:))]); end % here is the best plaintext p = char(XORdecrypt(cipher, allkeys(bestkey,:))); disp(['plaintext:\n',p]); % and the key that produced it disp(['best key: ',char(allkeys(bestkey,:))]);
XORdecrypt.m
function out = XORdecrypt(in,key) out = ones(size(in)) * NaN; % initialize with NaNs keyarray = repmat(key, 1, ceil(length(in)/length(key))); keyarray = keyarray(1:length(in)); out = bitxor(in, keyarray);
C
e15.c
// to compile: gcc -O3 -o e15 e15.c e15helper.c #include <stdio.h> #include <stdlib.h> #include <string.h> #include "e15helper.h" // header file for e15helper.c int main(int argc, char *argv[]) { // read the p059_cipher.txt file into a int_array struct int_array *cipherarray = readCipherFile("p059_cipher.txt"); printf("read in %d integers into cipherarray\n", cipherarray->n); // brute force method, try every 3-letter key // there are 26*26*26 = 17576 possibilities // 'a'=97, 'z'=122 char key[] = "aaa"; char best_key[] = "aaa"; int_array *plainarray; double s, best_s = 99999.0; int i,j,k; for (i=97; i<=122; i++) { for (j=97; j<=122; j++) { for (k=97; k<=122; k++) { key[0] = i; key[1] = j; key[2] = k; plainarray = XORdecrypt(cipherarray, key); s = EnglishScore(plainarray); if (s < best_s) { best_s = s; strcpy(best_key, key); } printf("%s %5.3f %s %5.3f\n", key, s, best_key, best_s); int_array_destroy(plainarray); } } } plainarray = XORdecrypt(cipherarray, best_key); char *plaintext = int_array2string(plainarray); printf("plaintext:\n%s\nbest_key:%s\n", plaintext, best_key); // project Euler actually asks for the sum of the ASCII values in the plaintext // long int the_sum = 0; for (i=0; i<plainarray->n; i++) { the_sum = the_sum + plainarray->array[i]; } printf("sum of ASCII values in plaintext = %ld\n", the_sum); free(plaintext); int_array_destroy(plainarray); int_array_destroy(cipherarray); return 0; }
e15helper.h
// define a struct to store an array of ints typedef struct { int n; int *array; } int_array; // free memory of an int_array void int_array_destroy(int_array *ia); // read cipher1.txt and put ints into an int_array int_array *readCipherFile(char fname[]); // decrypt int_array using 3-character string int_array *XORdecrypt(int_array *cipherarray, char key[3]); // convert int_array into a character string char *int_array2string(int_array *ia); // score an int_array on how "english" it is // (lower values -> more "english") double EnglishScore(int_array *b);
e15helper.c
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <math.h> #include "e15helper.h" // a function to free int_array memory // void int_array_destroy(int_array *ia) { if (ia->array != NULL) free(ia->array); free(ia); } // a function to read the p059_cipher.txt file and return an int_array // int_array *readCipherFile(char fname[]) { FILE *fid = fopen(fname, "r"); // open the ciphertext file long filesize; fseek(fid, 0L, SEEK_END); // count how many characters in the file filesize = ftell(fid); rewind(fid); char buf[filesize]; // allocate a character buffer to store file contents fscanf(fid, "%s", buf); // read in the file into the char buffer fclose(fid); int n_cipher = 0; // parse char buffer into an array of integers char *token; char *bufcpy = strdup(buf); token = strtok(bufcpy, ","); // split by comma while (token) { n_cipher++; // count how many there are token = strtok(NULL, ","); } free(bufcpy); // allocate an array of ints int *ia = malloc(n_cipher * sizeof(int)); bufcpy = strdup(buf); token = strtok(bufcpy, ","); // read tokens into the int array int i=0; while (token) { ia[i] = atoi(token); token = strtok(NULL, ","); i++; } free(bufcpy); free(token); // stick the data into a new int_array struct int_array *cipherarray = malloc(sizeof(cipherarray)); cipherarray->n = n_cipher; cipherarray->array = ia; return cipherarray; } // a function to take a candidate key and the cipherarray and // produce candidate plaintext int_array // int_array *XORdecrypt(int_array *cipherarray, char key[3]) { int_array *plainarray = malloc(sizeof(int_array)); plainarray->n = cipherarray->n; plainarray->array = malloc(plainarray->n * sizeof(int)); int i=0; while (i<plainarray->n) { plainarray->array[i] = cipherarray->array[i] ^ (int) key[i % 3]; i++; } return plainarray; } // a function to take an int_array and generate the // character string equivalent // char *int_array2string(int_array *ia) { // leave room for NULL termination char *ca = malloc( ((ia->n)+1) * sizeof(char) ); int i; for (i=0; i<ia->n; i++) { ca[i] = (char) ia->array[i]; } ca[i] = '\0'; // NULL terminate the string return ca; } // a function to take an int_array and score how "English" it is // according to letter frequencies // lower values -> more "English" // double EnglishScore(int_array *b) { // // based on expected frequencies of single characters in english // http://en.wikipedia.org/wiki/Letter_frequency // // a b c d e f g h i j k l m n o p q r s t u v w x y z // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 // // frequencies of English a-z double letterfreqs[] = { 0.08000, 0.01536, 0.02576, 0.03179, 0.12576, 0.03505, 0.01983, 0.06237, 0.06920, 0.00145, 0.00740, 0.04057, 0.02561, 0.06904, 0.07591, 0.01796, 0.00118, 0.05959, 0.06341, 0.09085, 0.02842, 0.00982, 0.02225, 0.00180, 0.01901, 0.00079 }; int bfreqs[256]; // to store freqs of plaintext chars int i; for (i=0; i<256; i++) { bfreqs[i] = 0; // initialize to zero } int blen = b->n; int ic; for (i=0; i<blen; i++) { ic = (unsigned int) b->array[i]; if ((ic >= 'A') & (ic <= 'Z')) { // uppercase letter ic += ('a' - 'A'); // convert to lowercase } bfreqs[ic]++; // increment counter for the given letter } double score = 0.0; double diff; for (i=0; i<256; i++) { // compare plaintext freqs to English freqs if ((i >= 'a') & (i <= 'z')) { // only include a-z diff = ((double) bfreqs[i]) - (letterfreqs[i-'a'] * ((double) b->n)); } else { diff = (double) bfreqs[i]; } score += fabs(diff) / ((double) b->n); } return score; }