Exercise 15 solutions

Python
MATLAB / Octave
C

Python

e15.py

import urllib2      # to open file from a URL
from numpy import * # for arrays

# read in the ciphertext file from the interwebs
response = urllib2.urlopen("https://projecteuler.net/project/resources/p059_cipher.txt")
responsestring = response.read()
cipherascii = fromstring(responsestring, sep=",", dtype="int")
# now cipherascii is an array of integers corresponding to 
# ascii codes

# here's a function to decrypt an array of ascii codes
# with a repeating decryption key (also ascii codes)
# using XOR bitwise decryption
#
def XORdecrypt(cipherascii, keyascii):
        # initialize plainascii with zeros
        plainascii = zeros(shape(cipherascii), dtype="int")
        i = 0
        keylen = len(keyascii)
        while (i<len(cipherascii)):
                plainascii[i] = cipherascii[i] ^ (keyascii[i % keylen]) # note the modulo operator
                i = i +1
        return plainascii

# for an example, let's try a guess at a cipherkey
#
cipherkey = "abc"
# convert cipherkey to an array of ascii codes
keyascii = array(map(ord, cipherkey), dtype="int")
plainascii = XORdecrypt(cipherascii, keyascii)
plainchars = map(chr, plainascii)
plainstring = ''.join(plainchars)
print plainstring # nope that doesn't look like English

# let's wrap that into a nice function
#
def getPlaintext(cipherascii, keystring):
        keyascii = array(map(ord, keystring), dtype="int")
        plainascii = XORdecrypt(cipherascii, keyascii)
        plainchars = map(chr, plainascii)
        plainstring = ''.join(plainchars)
        return plainstring

# example:
#
ps = getPlaintext(cipherascii, "abc")
print ps

# ok now we can decrypt given a candidate key.
# now we have to decide if the key was correct
# i.e. correct if the resulting plaintext is English
# There are lots of ways we can proceed.
#
# Let's try this: we'll write a function that will
# take a given string of text as input, and return to us
# a "score" for how "english" it looks. Our "score" will
# simply be the number of words (separated by spaces) in the
# text that are found in an English dictionary

# here's some code that loads english dictionary words
# that are between three and 8 letters long
# from /usr/share/dict/words
#
# note this uses Python's "list comprehension" notation so
# it may look rather obscure
#
Ewords = [w.strip().lower() for w in open("/usr/share/dict/words") if ((len(w.strip())>=3) and (len(w.strip())<=8))]

# and here's a function to count the number of words,
# i.e. chunks of text separated by spaces " ",
# that are found in the dictionary
#
def EnglishScore(plainstring):
        global Ewords 
        Pwords = plainstring.split(" ") # split by spaces
        ecount = 0
        # for each resulting "word" in the plaintext
        for i in range(len(Pwords)):
                # words length 3-8 only
                if ( (len(Pwords[i]) >= 3) and (len(Pwords[i]) <= 8) ):
                        if Pwords[i] in Ewords:
                                ecount = ecount + 1
        return ecount

# ok now let's try all possible keys (26 * 26 * 26 = 17576 of them)
# and for each keep track of the score
# and then keep the one with the max score
# 'a' is 97, 'z' is 122
# generate a matrix of all possible keys and initialize them to zero
#
keys = zeros((26*26*26,3), dtype="int")
i = 0
for i1 in range(97,123):
        for i2 in range(97,123):
                for i3 in range(97,123):
                        keys[i,:] = i1,i2,i3
                        i = i + 1

print shape(keys)

# let's keep track of each key's score
#
scores = zeros((shape(keys)[0],1)) # initialize all to zero

# ok now let's score plaintext for all keys
#
bestscore = 0.0  # best score so far
bestkey = "aaa"  # best key so far
# for each row (key)
for i in range(shape(keys)[0]):
        # slightly obscure code here I admit
        keystring = ''.join(map(chr,keys[i,:]))
        # get the plaintext
        ps = getPlaintext(cipherascii, keystring)
        # score the plaintext
        score = EnglishScore(ps)
        scores[i] = score
        # is score better than best so far?
        if (score > bestscore):
                bestkey = keystring
                bestscore = score
        print "%6d (%s) score=%3d bestscore = %3d bestkey = %s" % (i,keystring,score,bestscore,bestkey)

# and here is the best score, best key and corresponding plaintext
#
print bestscore
print bestkey
ps = getPlaintext(cipherascii, bestkey)
print ps

MATLAB / Octave

e15.m

% load in the cipher1.txt file into an array of ints
%
cipher = importdata('p059_cipher.txt', ',');

% import a dictionary of English words
%
w = importdata('/usr/share/dict/words', '\n');

% test all 26*26*26 = 17576 keys
%
alpha = double('a'):double('z'); % 97:122
allkeys = combvec(alpha,alpha,alpha)'; % 17576 x 3 matrix of all of them

% let's test all keys
%
bestkey = 1;
bestscore = 0;
for i=1:size(allkeys,1)
  pa = XORdecrypt(cipher, allkeys(i,:)); % decrypt into plaintext array
  p = char(pa);                          % convert to characters
  pw = strsplit(p, ' ');                 % split plaintext by ' ' into words
  % count how many plaintext words are found in the dictionary
  nwords = 0;
  for j=1:length(pw)
    % only words length 3 through 8
    if ( (length(pw{j}) >= 3) & (length(pw{j}) <= 8) )
      if ~isempty(find(strcmp(pw{j},w))) % is it in the dictionary?
        nwords = nwords + 1;
      end
    end
  end
  if nwords > bestscore                  % do we have a new best score?
    bestscore = nwords;
    bestkey = i;
  end
  disp([num2str(i),' ',char(allkeys(i,:)),' score= ',num2str(nwords),' bestscore= ',num2str(bestscore),' bestkey= ',char(allkeys(bestkey,:))]);
end

% here is the best plaintext
p = char(XORdecrypt(cipher, allkeys(bestkey,:)));
disp(['plaintext:\n',p]);
% and the key that produced it
disp(['best key: ',char(allkeys(bestkey,:))]);

XORdecrypt.m

function out = XORdecrypt(in,key)
  out = ones(size(in)) * NaN; % initialize with NaNs
  keyarray = repmat(key, 1, ceil(length(in)/length(key)));
  keyarray = keyarray(1:length(in));
  out = bitxor(in, keyarray);

C

e15.c

// to compile: gcc -O3 -o e15 e15.c e15helper.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "e15helper.h"  // header file for e15helper.c

int main(int argc, char *argv[]) {

  // read the p059_cipher.txt file into a int_array struct
  int_array *cipherarray = readCipherFile("p059_cipher.txt");
  printf("read in %d integers into cipherarray\n", cipherarray->n);

  // brute force method, try every 3-letter key
  // there are 26*26*26 = 17576 possibilities
  // 'a'=97, 'z'=122

  char key[] = "aaa";
  char best_key[] = "aaa";
  int_array *plainarray;
  double s, best_s = 99999.0;
  int i,j,k;
  for (i=97; i<=122; i++) {
    for (j=97; j<=122; j++) {
      for (k=97; k<=122; k++) {
        key[0] = i;
        key[1] = j;
        key[2] = k;
        plainarray = XORdecrypt(cipherarray, key);
        s = EnglishScore(plainarray);
        if (s < best_s) {
          best_s = s;
          strcpy(best_key, key);
        }
        printf("%s %5.3f %s %5.3f\n", key, s, best_key, best_s);
        int_array_destroy(plainarray);
      }
    }
  }     

  plainarray = XORdecrypt(cipherarray, best_key);
  char *plaintext = int_array2string(plainarray);
  printf("plaintext:\n%s\nbest_key:%s\n", plaintext, best_key);

  // project Euler actually asks for the sum of the ASCII values in the plaintext
  //
  long int the_sum = 0;
  for (i=0; i<plainarray->n; i++) {
    the_sum = the_sum + plainarray->array[i];
  }
  printf("sum of ASCII values in plaintext = %ld\n", the_sum);

  free(plaintext);
  int_array_destroy(plainarray);
  int_array_destroy(cipherarray);
  return 0;
}

e15helper.h

// define a struct to store an array of ints
typedef struct {
  int n;
  int *array;
} int_array;

// free memory of an int_array
void int_array_destroy(int_array *ia);

// read cipher1.txt and put ints into an int_array
int_array *readCipherFile(char fname[]);

// decrypt int_array using 3-character string
int_array *XORdecrypt(int_array *cipherarray, char key[3]);

// convert int_array into a character string
char *int_array2string(int_array *ia);

// score an int_array on how "english" it is
// (lower values -> more "english")
double EnglishScore(int_array *b);

e15helper.c

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "e15helper.h"

// a function to free int_array memory
//
void int_array_destroy(int_array *ia) {
  if (ia->array != NULL) free(ia->array);
  free(ia);
}

// a function to read the p059_cipher.txt file and return an int_array
//
int_array *readCipherFile(char fname[]) {
  FILE *fid = fopen(fname, "r");  // open the ciphertext file
  long filesize;
  fseek(fid, 0L, SEEK_END);       // count how many characters in the file
  filesize = ftell(fid);
  rewind(fid);
  char buf[filesize];             // allocate a character buffer to store file contents
  fscanf(fid, "%s", buf);         // read in the file into the char buffer
  fclose(fid);
  int n_cipher = 0;               // parse char buffer into an array of integers
  char *token;
  char *bufcpy = strdup(buf);
  token = strtok(bufcpy, ",");    // split by comma
  while (token) {
    n_cipher++;                   // count how many there are
    token = strtok(NULL, ",");
  }
  free(bufcpy);
  // allocate an array of ints
  int *ia = malloc(n_cipher * sizeof(int));
  bufcpy = strdup(buf);
  token = strtok(bufcpy, ",");   // read tokens into the int array
  int i=0;
  while (token) {
    ia[i] = atoi(token);
    token = strtok(NULL, ",");
    i++;
  }
  free(bufcpy);
  free(token);
  // stick the data into a new int_array struct
  int_array *cipherarray = malloc(sizeof(cipherarray));
  cipherarray->n = n_cipher;
  cipherarray->array = ia;
  return cipherarray;
}

// a function to take a candidate key and the cipherarray and
// produce candidate plaintext int_array
//
int_array *XORdecrypt(int_array *cipherarray, char key[3]) {
  int_array *plainarray = malloc(sizeof(int_array));
  plainarray->n = cipherarray->n;
  plainarray->array = malloc(plainarray->n * sizeof(int));
  int i=0;
  while (i<plainarray->n) {
    plainarray->array[i] = cipherarray->array[i] ^ (int) key[i % 3];
    i++;
  }
  return plainarray;
}

// a function to take an int_array and generate the
// character string equivalent
//
char *int_array2string(int_array *ia) {
  // leave room for NULL termination
  char *ca = malloc( ((ia->n)+1) * sizeof(char) );
  int i;
  for (i=0; i<ia->n; i++) {
    ca[i] = (char) ia->array[i];
  }
  ca[i] = '\0'; // NULL terminate the string
  return ca;
}

// a function to take an int_array and score how "English" it is
// according to letter frequencies
// lower values -> more "English"
//
double EnglishScore(int_array *b) {
  //
  // based on expected frequencies of single characters in english
  // http://en.wikipedia.org/wiki/Letter_frequency
  //
  // a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z
  // 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
  //
  // frequencies of English a-z
  double letterfreqs[] = {      0.08000, 0.01536, 0.02576, 0.03179, 0.12576, 0.03505,
                                0.01983, 0.06237, 0.06920, 0.00145, 0.00740, 0.04057,
                                0.02561, 0.06904, 0.07591, 0.01796, 0.00118, 0.05959,
                                0.06341, 0.09085, 0.02842, 0.00982, 0.02225, 0.00180,
                                0.01901, 0.00079 };

  int bfreqs[256];          // to store freqs of plaintext chars
  int i;
  for (i=0; i<256; i++) {
    bfreqs[i] = 0;          // initialize to zero
  }
  int blen = b->n;
  int ic;
  for (i=0; i<blen; i++) {
    ic = (unsigned int) b->array[i];
    if ((ic >= 'A') & (ic <= 'Z')) {    // uppercase letter
      ic += ('a' - 'A');                // convert to lowercase
    }
    bfreqs[ic]++;                  // increment counter for the given letter
  }
  double score = 0.0;
  double diff;
  for (i=0; i<256; i++) {          // compare plaintext freqs to English freqs
    if ((i >= 'a') & (i <= 'z')) { // only include a-z
      diff = ((double) bfreqs[i]) - (letterfreqs[i-'a'] * ((double) b->n));
    }
    else {
      diff = (double) bfreqs[i];
    }
    score += fabs(diff) / ((double) b->n);
  }
  return score;
}

Exercise 15 solutions

Table of Contents

Python

MATLAB / Octave

C