# Exercise 15 solutions

## Python

e15.py

```import urllib2      # to open file from a URL
from numpy import * # for arrays

# read in the ciphertext file from the interwebs
response = urllib2.urlopen("https://projecteuler.net/project/resources/p059_cipher.txt")
cipherascii = fromstring(responsestring, sep=",", dtype="int")
# now cipherascii is an array of integers corresponding to
# ascii codes

# here's a function to decrypt an array of ascii codes
# with a repeating decryption key (also ascii codes)
# using XOR bitwise decryption
#
def XORdecrypt(cipherascii, keyascii):
# initialize plainascii with zeros
plainascii = zeros(shape(cipherascii), dtype="int")
i = 0
keylen = len(keyascii)
while (i<len(cipherascii)):
plainascii[i] = cipherascii[i] ^ (keyascii[i % keylen]) # note the modulo operator
i = i +1
return plainascii

# for an example, let's try a guess at a cipherkey
#
cipherkey = "abc"
# convert cipherkey to an array of ascii codes
keyascii = array(map(ord, cipherkey), dtype="int")
plainascii = XORdecrypt(cipherascii, keyascii)
plainchars = map(chr, plainascii)
plainstring = ''.join(plainchars)
print plainstring # nope that doesn't look like English

# let's wrap that into a nice function
#
def getPlaintext(cipherascii, keystring):
keyascii = array(map(ord, keystring), dtype="int")
plainascii = XORdecrypt(cipherascii, keyascii)
plainchars = map(chr, plainascii)
plainstring = ''.join(plainchars)
return plainstring

# example:
#
ps = getPlaintext(cipherascii, "abc")
print ps

# ok now we can decrypt given a candidate key.
# now we have to decide if the key was correct
# i.e. correct if the resulting plaintext is English
# There are lots of ways we can proceed.
#
# Let's try this: we'll write a function that will
# take a given string of text as input, and return to us
# a "score" for how "english" it looks. Our "score" will
# simply be the number of words (separated by spaces) in the
# text that are found in an English dictionary

# here's some code that loads english dictionary words
# that are between three and 8 letters long
# from /usr/share/dict/words
#
# note this uses Python's "list comprehension" notation so
# it may look rather obscure
#
Ewords = [w.strip().lower() for w in open("/usr/share/dict/words") if ((len(w.strip())>=3) and (len(w.strip())<=8))]

# and here's a function to count the number of words,
# i.e. chunks of text separated by spaces " ",
# that are found in the dictionary
#
def EnglishScore(plainstring):
global Ewords
Pwords = plainstring.split(" ") # split by spaces
ecount = 0
# for each resulting "word" in the plaintext
for i in range(len(Pwords)):
# words length 3-8 only
if ( (len(Pwords[i]) >= 3) and (len(Pwords[i]) <= 8) ):
if Pwords[i] in Ewords:
ecount = ecount + 1
return ecount

# ok now let's try all possible keys (26 * 26 * 26 = 17576 of them)
# and for each keep track of the score
# and then keep the one with the max score
# 'a' is 97, 'z' is 122
# generate a matrix of all possible keys and initialize them to zero
#
keys = zeros((26*26*26,3), dtype="int")
i = 0
for i1 in range(97,123):
for i2 in range(97,123):
for i3 in range(97,123):
keys[i,:] = i1,i2,i3
i = i + 1

print shape(keys)

# let's keep track of each key's score
#
scores = zeros((shape(keys),1)) # initialize all to zero

# ok now let's score plaintext for all keys
#
bestscore = 0.0  # best score so far
bestkey = "aaa"  # best key so far
# for each row (key)
for i in range(shape(keys)):
# slightly obscure code here I admit
keystring = ''.join(map(chr,keys[i,:]))
# get the plaintext
ps = getPlaintext(cipherascii, keystring)
# score the plaintext
score = EnglishScore(ps)
scores[i] = score
# is score better than best so far?
if (score > bestscore):
bestkey = keystring
bestscore = score
print "%6d (%s) score=%3d bestscore = %3d bestkey = %s" % (i,keystring,score,bestscore,bestkey)

# and here is the best score, best key and corresponding plaintext
#
print bestscore
print bestkey
ps = getPlaintext(cipherascii, bestkey)
print ps
```

## MATLAB / Octave

e15.m

```% load in the cipher1.txt file into an array of ints
%
cipher = importdata('p059_cipher.txt', ',');

% import a dictionary of English words
%
w = importdata('/usr/share/dict/words', '\n');

% test all 26*26*26 = 17576 keys
%
alpha = double('a'):double('z'); % 97:122
allkeys = combvec(alpha,alpha,alpha)'; % 17576 x 3 matrix of all of them

% let's test all keys
%
bestkey = 1;
bestscore = 0;
for i=1:size(allkeys,1)
pa = XORdecrypt(cipher, allkeys(i,:)); % decrypt into plaintext array
p = char(pa);                          % convert to characters
pw = strsplit(p, ' ');                 % split plaintext by ' ' into words
% count how many plaintext words are found in the dictionary
nwords = 0;
for j=1:length(pw)
% only words length 3 through 8
if ( (length(pw{j}) >= 3) & (length(pw{j}) <= 8) )
if ~isempty(find(strcmp(pw{j},w))) % is it in the dictionary?
nwords = nwords + 1;
end
end
end
if nwords > bestscore                  % do we have a new best score?
bestscore = nwords;
bestkey = i;
end
disp([num2str(i),' ',char(allkeys(i,:)),' score= ',num2str(nwords),' bestscore= ',num2str(bestscore),' bestkey= ',char(allkeys(bestkey,:))]);
end

% here is the best plaintext
p = char(XORdecrypt(cipher, allkeys(bestkey,:)));
disp(['plaintext:\n',p]);
% and the key that produced it
disp(['best key: ',char(allkeys(bestkey,:))]);
```

XORdecrypt.m

```function out = XORdecrypt(in,key)
out = ones(size(in)) * NaN; % initialize with NaNs
keyarray = repmat(key, 1, ceil(length(in)/length(key)));
keyarray = keyarray(1:length(in));
out = bitxor(in, keyarray);
```

## C

e15.c

```// to compile: gcc -O3 -o e15 e15.c e15helper.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "e15helper.h"  // header file for e15helper.c

int main(int argc, char *argv[]) {

// read the p059_cipher.txt file into a int_array struct
printf("read in %d integers into cipherarray\n", cipherarray->n);

// brute force method, try every 3-letter key
// there are 26*26*26 = 17576 possibilities
// 'a'=97, 'z'=122

char key[] = "aaa";
char best_key[] = "aaa";
int_array *plainarray;
double s, best_s = 99999.0;
int i,j,k;
for (i=97; i<=122; i++) {
for (j=97; j<=122; j++) {
for (k=97; k<=122; k++) {
key = i;
key = j;
key = k;
plainarray = XORdecrypt(cipherarray, key);
s = EnglishScore(plainarray);
if (s < best_s) {
best_s = s;
strcpy(best_key, key);
}
printf("%s %5.3f %s %5.3f\n", key, s, best_key, best_s);
int_array_destroy(plainarray);
}
}
}

plainarray = XORdecrypt(cipherarray, best_key);
char *plaintext = int_array2string(plainarray);
printf("plaintext:\n%s\nbest_key:%s\n", plaintext, best_key);

// project Euler actually asks for the sum of the ASCII values in the plaintext
//
long int the_sum = 0;
for (i=0; i<plainarray->n; i++) {
the_sum = the_sum + plainarray->array[i];
}
printf("sum of ASCII values in plaintext = %ld\n", the_sum);

free(plaintext);
int_array_destroy(plainarray);
int_array_destroy(cipherarray);
return 0;
}
```

e15helper.h

```// define a struct to store an array of ints
typedef struct {
int n;
int *array;
} int_array;

// free memory of an int_array
void int_array_destroy(int_array *ia);

// read cipher1.txt and put ints into an int_array

// decrypt int_array using 3-character string
int_array *XORdecrypt(int_array *cipherarray, char key);

// convert int_array into a character string
char *int_array2string(int_array *ia);

// score an int_array on how "english" it is
// (lower values -> more "english")
double EnglishScore(int_array *b);
```

e15helper.c

```#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "e15helper.h"

// a function to free int_array memory
//
void int_array_destroy(int_array *ia) {
if (ia->array != NULL) free(ia->array);
free(ia);
}

// a function to read the p059_cipher.txt file and return an int_array
//
FILE *fid = fopen(fname, "r");  // open the ciphertext file
long filesize;
fseek(fid, 0L, SEEK_END);       // count how many characters in the file
filesize = ftell(fid);
rewind(fid);
char buf[filesize];             // allocate a character buffer to store file contents
fscanf(fid, "%s", buf);         // read in the file into the char buffer
fclose(fid);
int n_cipher = 0;               // parse char buffer into an array of integers
char *token;
char *bufcpy = strdup(buf);
token = strtok(bufcpy, ",");    // split by comma
while (token) {
n_cipher++;                   // count how many there are
token = strtok(NULL, ",");
}
free(bufcpy);
// allocate an array of ints
int *ia = malloc(n_cipher * sizeof(int));
bufcpy = strdup(buf);
token = strtok(bufcpy, ",");   // read tokens into the int array
int i=0;
while (token) {
ia[i] = atoi(token);
token = strtok(NULL, ",");
i++;
}
free(bufcpy);
free(token);
// stick the data into a new int_array struct
int_array *cipherarray = malloc(sizeof(cipherarray));
cipherarray->n = n_cipher;
cipherarray->array = ia;
return cipherarray;
}

// a function to take a candidate key and the cipherarray and
// produce candidate plaintext int_array
//
int_array *XORdecrypt(int_array *cipherarray, char key) {
int_array *plainarray = malloc(sizeof(int_array));
plainarray->n = cipherarray->n;
plainarray->array = malloc(plainarray->n * sizeof(int));
int i=0;
while (i<plainarray->n) {
plainarray->array[i] = cipherarray->array[i] ^ (int) key[i % 3];
i++;
}
return plainarray;
}

// a function to take an int_array and generate the
// character string equivalent
//
char *int_array2string(int_array *ia) {
// leave room for NULL termination
char *ca = malloc( ((ia->n)+1) * sizeof(char) );
int i;
for (i=0; i<ia->n; i++) {
ca[i] = (char) ia->array[i];
}
ca[i] = '\0'; // NULL terminate the string
return ca;
}

// a function to take an int_array and score how "English" it is
// according to letter frequencies
// lower values -> more "English"
//
double EnglishScore(int_array *b) {
//
// based on expected frequencies of single characters in english
// http://en.wikipedia.org/wiki/Letter_frequency
//
// a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z
// 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
//
// frequencies of English a-z
double letterfreqs[] = {      0.08000, 0.01536, 0.02576, 0.03179, 0.12576, 0.03505,
0.01983, 0.06237, 0.06920, 0.00145, 0.00740, 0.04057,
0.02561, 0.06904, 0.07591, 0.01796, 0.00118, 0.05959,
0.06341, 0.09085, 0.02842, 0.00982, 0.02225, 0.00180,
0.01901, 0.00079 };

int bfreqs;          // to store freqs of plaintext chars
int i;
for (i=0; i<256; i++) {
bfreqs[i] = 0;          // initialize to zero
}
int blen = b->n;
int ic;
for (i=0; i<blen; i++) {
ic = (unsigned int) b->array[i];
if ((ic >= 'A') & (ic <= 'Z')) {    // uppercase letter
ic += ('a' - 'A');                // convert to lowercase
}
bfreqs[ic]++;                  // increment counter for the given letter
}
double score = 0.0;
double diff;
for (i=0; i<256; i++) {          // compare plaintext freqs to English freqs
if ((i >= 'a') & (i <= 'z')) { // only include a-z
diff = ((double) bfreqs[i]) - (letterfreqs[i-'a'] * ((double) b->n));
}
else {
diff = (double) bfreqs[i];
}
score += fabs(diff) / ((double) b->n);
}
return score;
}
```

Paul Gribble | fall 2014 