Log In
New Account
  
 
Home My Page Project Tree Code Snippets Project Openings
 

Browse | Submit A New Snippet | Create A Package

 

Solving Substitution Ciphers with Genetics Algorithm

Type:
Full Script
Category:
Searching
License:
BSD License
Language:
Perl
 
Description:
This is a slightly modified implementation of the genetics algorithm, described in "Solving Substitution Ciphers with Genetics Algorithm" by Joe Gester - http://www.cs.rochester.edu/u/brown/Crypto/studprojs/SubstGen.pdf
A corpus of English text should be supplied - in this sample I pasted in the corpus.txt file "The Secret Garden" by G.K. Chesterton - http://eserver.org/fiction/innocence/secretgarden.html

Versions Of This Snippet::

Vladimir Tsvetkov
Snippet ID Download Version Date Posted Author Delete
261.02005-03-21 13:17Vladimir Tsvetkov

Download a raw-text version of this code by clicking on "Download Version"

 


Latest Snippet Version: :1.0

#########################################################################
# Solving Substitution Ciphers with Genetics Algorithm!			#
# 	(c) Vladimir Tsvetkov, 2005					#
# 	For selfsatisfaction only!					#
#########################################################################

#!/usr/bin/perl

use warnings;
use strict;

###############################################################################
# cypher text that is about to be cracked:
my $cypher_text = "BTKIB" . "OKIBR" . "BAARJ" . "ZGBON" . "QBBSH" . "OZIBM" 
		. "BAAGB" . "ONZAH" . "RANMH" . "OZIBI" . "JQSAB" . "WKNHM"
		. "KTLBJ" . "ZZIBR" . "BWKQH" . "RWKHA" . "HHPJU" . "HVKIB"
		. "OJWQK" . "HRHWQ" . "BORIJ" . "KRJZX" . "HTWXK" . "HIJSS"
		. "BWWBC" . "K";

###############################################################################
# almost randomly chosen corpus in English:
# use this to calculate the digram and trigram frequency tables:
my $corpus = "";

###############################################################################
# capitalize all letters in the corpus, remove spaces, numbers and punctuation:
sub prepare_corpus {
	open (CORPUSFILE, '<', "corpus.txt")
		or die "Can't open file! $!";
	my @contents = <CORPUSFILE>;
	close (CORPUSFILE);
	foreach my $paragraph (@contents) {
		$corpus .= $paragraph;
	}
	$corpus =~ tr/a-z/A-Z/;	# capitalize
	$corpus =~ s/\s//g;	# remove spaces
	$corpus =~ s/\W//g;	# remove punctuation and other symbols
	$corpus =~ s/\d//g;	# remove digits
}

prepare_corpus ();

###############################################################################
# alphabeth we use:
my $alphabeth = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

###############################################################################
# letters distribution for the alphabeth we use (in %):
# if you are using another alphabet, you should change this table!
my %letter_distribution_table = (
	'E' => 12.31,	 'L' => 4.03, 	'B' => 1.62,
	'T' => 9.59,	 'D' => 3.65, 	'G' => 1.61,
	'A' => 8.05,	 'C' => 3.20, 	'V' => 0.93,
	'O' => 7.94,	 'U' => 3.10, 	'K' => 0.52,
	'N' => 7.19,	 'P' => 2.29, 	'Q' => 0.20,
	'I' => 7.18,	 'F' => 2.28, 	'X' => 0.20,
	'S' => 6.59,	 'M' => 2.25, 	'J' => 0.10,
	'R' => 6.03,	 'W' => 2.03, 	'Z' => 0.09,
	'H' => 5.14,	 'Y' => 1.88
);

###############################################################################
# digram frequency table:
my %digram_frequency_table = ();

# this will populate the %digram_frequency_table:
sub calculate_digram_frequency_table {
	my $i = 1;
	foreach my $letter (split (//, $corpus)) {
		last if ($i == length $corpus);
		my $digram = $letter . substr ($corpus, $i, 1);
		$i ++;
		if (defined $digram_frequency_table{$digram}) {
			$digram_frequency_table{$digram} ++;
		} else {
			$digram_frequency_table{$digram} = 1;
		}
	}
}

calculate_digram_frequency_table ();

###############################################################################
# trigram frequency table:
my %trigram_frequency_table = ();

# this will populate the %trigram_frequency_table:
sub calculate_trigram_frequency_table {
	my $i = 1;
	foreach my $letter (split (//, $corpus)) {
		last if ($i == (length $corpus) - 1);
		my $trigram = $letter . substr ($corpus, $i, 2);
		$i ++;
		if (defined $trigram_frequency_table{$trigram}) {
			$trigram_frequency_table{$trigram} ++;
		} else {
			$trigram_frequency_table{$trigram} = 1;
		}
	}
}

calculate_trigram_frequency_table ();

###############################################################################
# implementing the crossover between individuals:
# as long as less than half of the key is swapped at any one time, at least 
# the good information from one parent remains in the child and likely the 
# majority of the good information from the other parent also remains
# Example:
# father - A (B  C  D) E  F
# mother - B  D  F  E  C  A
# the crossover takes the next 3 steps:
# 1. tr/BD/DB/
# 2. tr/CF/FC/
# 3. tr/DE/ED/
# child  - E  B  C  D  F  A
sub crossover {
	my ($father, $mother) = @_;
	my $letters_count = length $alphabeth;
	my $crossover_fragment_length = 12;
	my $fragment_offset = 
		int (rand ($letters_count - $crossover_fragment_length + 1));

	my $child = $mother;
	my $father_fragment = substr ($father,
				$fragment_offset, 
				$crossover_fragment_length);

	my $mother_fragment = substr ($mother, 
				$fragment_offset, 
				$crossover_fragment_length);

	# swap the randomly chosen fragments:
	my $i = 0;
	foreach my $father_letter (split (//, $father_fragment)) {
		my $mother_letter = substr ($mother_fragment, $i, 1);
		$i ++;
		my $digram_left = $father_letter . $mother_letter;
		my $digram_right = $mother_letter . $father_letter;
		$_ = $child;
		eval "tr/$digram_left/$digram_right/";
		$child = $_;
	}

	return $child;
}

###############################################################################
# implementing the fitness function for a given individual:
# To apply the fitness function to an individual, the cipher-text is decrypted 
# using the individual’s gene as it’s key. Then every trigram and digram in 
# the decrypted text is looked up in the table of how many times it occurs 
# in the corpus. These numbers are then summed. Thus, trigrams and bigrams 
# that occur commonly in the corpus are more heavily rewarded than those 
# that do not.
sub fitness {
	my ($individual) = @_;
	my $permutated_alphabeth = $individual;
	my $plain_text = $cypher_text;

	# decrypt, using the given individual:
	$_ = $plain_text;
	eval "tr/$permutated_alphabeth/$alphabeth/";
	$plain_text = $_;

	# calculate the fitness function of the decrypted text:
	my $fitness = 0;
	my $i = 1;
	foreach my $letter (split (//, $plain_text)) {
		unless ($i == length $plain_text) {
			my $next_letter = substr ($plain_text, $i, 1);
			my $digram = $letter . $next_letter;
			$fitness += $digram_frequency_table{$digram}
				if (defined $digram_frequency_table{$digram});
			unless ($i == (length $corpus) - 1) {
				my $next_two_letters = substr ($plain_text, $i, 2);
				my $trigram = $letter . $next_two_letters;
				$fitness += $trigram_frequency_table{$trigram}
					if (defined $trigram_frequency_table{$trigram});
			} else {
				$i ++;
				next;
			}
		} else {
			last;
		}
		$i ++;
	}

	return $fitness;
}

###############################################################################
# implementing mutation:
# This is implemented as a swap between probable neighbors based on the 
# single letter frequencies of English. That is, the character ’e’ might 
# be swapped with ’t’ but not with ’v’ or ’x’.
# In this case the random swap occurs only for the first 9 most frequent 
# letters, based on the %letter_distribution_table.
sub mutation {
	my ($individual) = @_;
	my $mutated_individual = $individual;
	my $letters_count = 9;
	my $letter_position1 = int (rand ($letters_count));
	my $letter_position2 = int (rand ($letters_count));
	return $mutated_individual if ($letter_position1 == $letter_position2);
	my @letters_sorted_by_distribution = 
		reverse
			sort  { $letter_distribution_table{$a} <=>
				$letter_distribution_table{$b} }
				keys %letter_distribution_table;
	my $letter1 = $letters_sorted_by_distribution[$letter_position1];
	my $letter2 = $letters_sorted_by_distribution[$letter_position2];
	my $index1 = index ($alphabeth, $letter1);
	my $index2 = index ($alphabeth, $letter2);
	my @letters = split (//, $individual);
	my $swap_letter1 = $letters[$index1];
	my $swap_letter2 = $letters[$index2];
	my $digram_left = $swap_letter1 . $swap_letter2;
	my $digram_right = $swap_letter2 . $swap_letter1;
	$_ = $mutated_individual;
	eval "tr/$digram_left/$digram_right/";
	$mutated_individual = $_;
	return $mutated_individual;
}

###############################################################################
# generate random individual (generates random permutation of the alphabeth):
sub generate_random_individual {
	my @letters = split (//, $alphabeth);
	my $random_individual = '';
	foreach my $i (reverse (1 .. length $alphabeth)) {
		my $pos = int (rand $i);
		$random_individual .= $letters[$pos];
		# delete the concatenated element from @letters:
		unless ($pos == $#letters) {
			my $temp = $letters[$#letters];
			$letters[$#letters] = $letters[$pos];
			$letters[$pos] = $temp;
			pop @letters;
		} else {
			pop @letters;
		}
	}
	return $random_individual;
}

###############################################################################
# number of populations:
my $populations_count = 20;

###############################################################################
# number of individuals in a population:
my $individuals_count = 20;

###############################################################################
# array of populations:
my @populations;

###############################################################################
# describe a population of indiliduals:
# my $population = {
#	'individuals' => [],
#	'total_fitness' => 0
# };

###############################################################################
# describe an individual:
# my $individual = {
#	'description' => '',
#	'fitness' => 0
# };

###############################################################################
# sort individuals in a population by the fitness function:
sub sort_individuals {
	my ($population) = @_;
	@{$population->{'individuals'}} = 
		reverse sort { $a->{'fitness'} <=> $b->{'fitness'} }
			@{$population->{'individuals'}};
}

###############################################################################
# sort populations by the cumulative fitness values:
sub sort_populations {
	@populations = 
		reverse sort { $a->{'total_fitness'} <=> $b->{'total_fitness'} }
			@populations;
}

###############################################################################
# initialize the first generation:
sub zero_generation {
	foreach (1 .. $populations_count) {
		my $population = {};
		$population->{'individuals'} = [];
		$population->{'total_fitness'} = 0;
		foreach (1 .. $individuals_count) {
			my $individual = {};
			$individual->{'description'} = '';
			$individual->{'fitness'} = 0;
			my $random_individual = generate_random_individual ();
			$individual->{'description'} = $random_individual;
			my $fitness = fitness ($random_individual);
			$individual->{'fitness'} = $fitness;
			push (@{$population->{'individuals'}}, $individual);
			$population->{'total_fitness'} += $fitness;
		}
		sort_individuals ($population);
		push (@populations, $population);
	}
	sort_populations ();
}

###############################################################################
# number of individuals in a population that will mutate:
my $mutate_individuals_count = 4;

###############################################################################
# mutate some randomly chosen individuals in a population:
# A random range of $mutate_individuals_count individuals from a single 
# population will mutate.
sub mutate_population {
	my ($population) = @_;
	my $offset = int 
		(rand ($individuals_count - $mutate_individuals_count));
	foreach my $i (1 .. $mutate_individuals_count) {
		my $individual = 
			${$population->{'individuals'}}[$offset + $i];
		my $mutated_description = 
			mutation ($individual->{'description'});
		$individual->{'description'} = $mutated_description;
		$population->{'total_fitness'} -= $individual->{'fitness'};
		$individual->{'fitness'} = 
			fitness ($mutated_description);
		$population->{'total_fitness'} += $individual->{'fitness'};
	}
	sort_individuals ($population);
}

###############################################################################
# mutate all populations:
sub mutate_all_populations {
	foreach my $population (@populations) {
		mutate_population ($population);
	}
	sort_populations ();
}

###############################################################################
# breed the individuals in a population:
# The father is chosen among the 5 best individuals in a population.
# The mother is chosen among the rest individuals in the same population.
# Their child takes the place of the weakest individual in the population.
sub breeding {
	my ($population) = @_;
	my $father_pos = int (rand (5));
	my $father_individual = ${$population->{'individuals'}}[$father_pos];
	my $mother_pos = int (rand ($individuals_count - 5)) + 5;
	my $mother_individual = ${$population->{'individuals'}}[$mother_pos];
	my $child_individual = 
		${$population->{'individuals'}}[$individuals_count - 1];
	my $father = $father_individual->{'description'};
	my $mother = $mother_individual->{'description'};
	my $child = crossover ($father, $mother);
	$child_individual->{'description'} = $child;
	$population->{'total_fitness'} -= $child_individual->{'fitness'};
	$child_individual->{'fitness'} = fitness ($child);
	$population->{'total_fitness'} += $child_individual->{'fitness'};
	sort_individuals ($population);
}

###############################################################################
# breed all populations:
sub breed_all_populations {
	foreach my $population (@populations) {
		breeding ($population);
	}
	sort_populations ();
}

###############################################################################
# interbreeding between neighbour populations:
# The father is the best individual in the first population.
# The mother is randomly chosen individual from the next weakest population.
# Their child takes the place of the weakest individual in the mother's
# population. I thought it's quite reasonable to interbreed only neighbour
# populations (populations that have close values for the total_fitness).
sub interbreeding {
	foreach my $i (0 .. $populations_count - 2) {
		my $father_population = $populations[$i];
		my $father_individual = 
			${$father_population->{'individuals'}}[0];
		my $mother_population = $populations[$i + 1];
		my $mother_pos = int (rand ($individuals_count));
		my $mother_individual = 
			${$father_population->{'individuals'}}[$mother_pos];
		my $child_individual = 
			${$mother_population->{'individuals'}}[$individuals_count - 1];
		my $father = $father_individual->{'description'};
		my $mother = $mother_individual->{'description'};
		my $child = crossover ($father, $mother);
		$child_individual->{'description'} = $child;
		$mother_population->{'total_fitness'} -= 
			$child_individual->{'fitness'};
		$child_individual->{'fitness'} = fitness ($child);
		$mother_population->{'total_fitness'} += 
			$child_individual->{'fitness'};
		sort_individuals ($mother_population);
	}
	sort_populations ();
}

###############################################################################
# clone an individual:
sub clone_individual {
	my ($individual) = @_;
	my $cloned_individual = {};
	$cloned_individual->{'description'} = $individual->{'description'};
	$cloned_individual->{'fitness'} = $individual->{'fitness'};
	return $cloned_individual;
}

###############################################################################
# sort array of individuals:
sub sort_best_individuals {
	my ($ref_individuals) = @_;
	@$ref_individuals = 
		reverse sort { $a->{'fitness'} <=> $b->{'fitness'} }
			@$ref_individuals;
}

###############################################################################
# the best individuals throughout all generations:
my @best_individuals;

###############################################################################
# check if this individual is not already in the @best_individuals array:
sub is_member_individual {
	my ($ref_individuals, $individual) = @_;
	foreach my $member_individual (@$ref_individuals) {
		if ($member_individual->{'description'} eq 
		    $individual->{'description'}) {
			return "TRUE";
		}
	}
	return undef;
}

###############################################################################
# merges to sorted lists of individuals:
sub merge_individuals {
	my ($ref_candidate_individuals) = @_;
	my @new_individuals = (@best_individuals);
	foreach my $candidate_individual (@$ref_candidate_individuals) {
		push (@new_individuals, $candidate_individual) 
			unless (is_member_individual (\@new_individuals, $candidate_individual));
	}
	sort_best_individuals (\@new_individuals);
	$#new_individuals = $populations_count - 1;
	@best_individuals = @new_individuals;
}

###############################################################################
# select the best individuals, and if they are better than the existing ones,
# change them:
# This is a way to save the local maxima for all generations in the evolution.
sub select_best_individuals {
	if (@best_individuals) {
		my @best_individuals_in_this_generation;
		foreach my $population (@populations) {
			my $individual = 
				clone_individual (${$population->{'individuals'}}[0]);
			push (@best_individuals_in_this_generation, $individual);
		}
		sort_best_individuals (\@best_individuals_in_this_generation);
		merge_individuals (\@best_individuals_in_this_generation);
	} else {
		# first initialization of @best_individuals;
		foreach my $population (@populations) {
			my $individual = 
				clone_individual (${$population->{'individuals'}}[0]);
			push (@best_individuals, $individual);
		}
		sort_best_individuals (\@best_individuals);
	}
}

###############################################################################
# calculate cumulative fitness for all populations in a generation:
sub cumulative_fitness {
	my $cumulative_fitness = 0;
	foreach my $population (@populations) {
		$cumulative_fitness += $population->{'total_fitness'};
	}
	return $cumulative_fitness;
}

###############################################################################
# calculate next generation:
sub next_generation {
	breed_all_populations ();
	select_best_individuals ();
	interbreeding ();
	select_best_individuals ();
	mutate_all_populations ();
	select_best_individuals ();
}

###############################################################################
# decrypt the cypher text, using the individual's description:
sub decrypt_individual {
	my ($individual) = @_;
	my $permutated_alphabeth = $individual->{'description'};
	my $plain_text = $cypher_text;
	# decrypt, using the given individual:
	$_ = $plain_text;
	eval "tr/$permutated_alphabeth/$alphabeth/";
	$plain_text = $_;
	return $plain_text;
}

###############################################################################
# prints all suggestions about the decription of the cypher text:
sub print_results {
	open (RESULTS, '>', "decriptions.txt") 
		or die "Can't open file! $!";
	foreach my $population (@populations) {
		foreach my $individual (@{$population->{'individuals'}}) {
			my $plain_text = decrypt_individual ($individual);
			print RESULTS "$plain_text\n";
		}
	}
	print RESULTS "\nBEST SUGGESTIONS:\n";
	foreach my $individual (@best_individuals) {
		my $plain_text = decrypt_individual ($individual);
		print RESULTS "$plain_text\n";
	}
	close (RESULTS);
}

###############################################################################
# number of generations that are allowed to be without an improvement:
# on every improvement increment this value!
# on every worsening decrement this value!
my $generations_without_improvement = 10;

###############################################################################
# simulation of the evolution process:
sub evolution {
	my $cumulative_fitness = 0;
	zero_generation ();
	select_best_individuals ();
	$cumulative_fitness = cumulative_fitness ();
	my $generations = 0;
	while ($generations_without_improvement) {
		print "GENERATION $generations\t\tFITNESS: $cumulative_fitness\tGENERATIONS LEFT: $generations_without_improvement\n";
		$generations ++;
		next_generation ();
		my $new_cumulative_fitness = cumulative_fitness ();
		if ($new_cumulative_fitness > $cumulative_fitness) {
			$generations_without_improvement ++;
			$cumulative_fitness = $new_cumulative_fitness;
		} else {
			$generations_without_improvement --;
		}
	}
	print_results ();
}

evolution ();

		

Submit a new version

You can submit a new version of this snippet if you have modified it and you feel it is appropriate to share with others..

 


Powered By GForge Collaborative Development Environment