 |
Browse
| Submit A New Snippet
| Create A Package
Versions Of This Snippet::
Download a raw-text version of this code by clicking on "Download Version"
Latest Snippet Version: :1.0
#########################################################################
# Solving Substitution Ciphers with Genetics Algorithm! #
# (c) Vladimir Tsvetkov, 2005 #
# For selfsatisfaction only! #
#########################################################################
#!/usr/bin/perl
use warnings;
use strict;
###############################################################################
# cypher text that is about to be cracked:
my $cypher_text = "BTKIB" . "OKIBR" . "BAARJ" . "ZGBON" . "QBBSH" . "OZIBM"
. "BAAGB" . "ONZAH" . "RANMH" . "OZIBI" . "JQSAB" . "WKNHM"
. "KTLBJ" . "ZZIBR" . "BWKQH" . "RWKHA" . "HHPJU" . "HVKIB"
. "OJWQK" . "HRHWQ" . "BORIJ" . "KRJZX" . "HTWXK" . "HIJSS"
. "BWWBC" . "K";
###############################################################################
# almost randomly chosen corpus in English:
# use this to calculate the digram and trigram frequency tables:
my $corpus = "";
###############################################################################
# capitalize all letters in the corpus, remove spaces, numbers and punctuation:
sub prepare_corpus {
open (CORPUSFILE, '<', "corpus.txt")
or die "Can't open file! $!";
my @contents = <CORPUSFILE>;
close (CORPUSFILE);
foreach my $paragraph (@contents) {
$corpus .= $paragraph;
}
$corpus =~ tr/a-z/A-Z/; # capitalize
$corpus =~ s/\s//g; # remove spaces
$corpus =~ s/\W//g; # remove punctuation and other symbols
$corpus =~ s/\d//g; # remove digits
}
prepare_corpus ();
###############################################################################
# alphabeth we use:
my $alphabeth = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
###############################################################################
# letters distribution for the alphabeth we use (in %):
# if you are using another alphabet, you should change this table!
my %letter_distribution_table = (
'E' => 12.31, 'L' => 4.03, 'B' => 1.62,
'T' => 9.59, 'D' => 3.65, 'G' => 1.61,
'A' => 8.05, 'C' => 3.20, 'V' => 0.93,
'O' => 7.94, 'U' => 3.10, 'K' => 0.52,
'N' => 7.19, 'P' => 2.29, 'Q' => 0.20,
'I' => 7.18, 'F' => 2.28, 'X' => 0.20,
'S' => 6.59, 'M' => 2.25, 'J' => 0.10,
'R' => 6.03, 'W' => 2.03, 'Z' => 0.09,
'H' => 5.14, 'Y' => 1.88
);
###############################################################################
# digram frequency table:
my %digram_frequency_table = ();
# this will populate the %digram_frequency_table:
sub calculate_digram_frequency_table {
my $i = 1;
foreach my $letter (split (//, $corpus)) {
last if ($i == length $corpus);
my $digram = $letter . substr ($corpus, $i, 1);
$i ++;
if (defined $digram_frequency_table{$digram}) {
$digram_frequency_table{$digram} ++;
} else {
$digram_frequency_table{$digram} = 1;
}
}
}
calculate_digram_frequency_table ();
###############################################################################
# trigram frequency table:
my %trigram_frequency_table = ();
# this will populate the %trigram_frequency_table:
sub calculate_trigram_frequency_table {
my $i = 1;
foreach my $letter (split (//, $corpus)) {
last if ($i == (length $corpus) - 1);
my $trigram = $letter . substr ($corpus, $i, 2);
$i ++;
if (defined $trigram_frequency_table{$trigram}) {
$trigram_frequency_table{$trigram} ++;
} else {
$trigram_frequency_table{$trigram} = 1;
}
}
}
calculate_trigram_frequency_table ();
###############################################################################
# implementing the crossover between individuals:
# as long as less than half of the key is swapped at any one time, at least
# the good information from one parent remains in the child and likely the
# majority of the good information from the other parent also remains
# Example:
# father - A (B C D) E F
# mother - B D F E C A
# the crossover takes the next 3 steps:
# 1. tr/BD/DB/
# 2. tr/CF/FC/
# 3. tr/DE/ED/
# child - E B C D F A
sub crossover {
my ($father, $mother) = @_;
my $letters_count = length $alphabeth;
my $crossover_fragment_length = 12;
my $fragment_offset =
int (rand ($letters_count - $crossover_fragment_length + 1));
my $child = $mother;
my $father_fragment = substr ($father,
$fragment_offset,
$crossover_fragment_length);
my $mother_fragment = substr ($mother,
$fragment_offset,
$crossover_fragment_length);
# swap the randomly chosen fragments:
my $i = 0;
foreach my $father_letter (split (//, $father_fragment)) {
my $mother_letter = substr ($mother_fragment, $i, 1);
$i ++;
my $digram_left = $father_letter . $mother_letter;
my $digram_right = $mother_letter . $father_letter;
$_ = $child;
eval "tr/$digram_left/$digram_right/";
$child = $_;
}
return $child;
}
###############################################################################
# implementing the fitness function for a given individual:
# To apply the fitness function to an individual, the cipher-text is decrypted
# using the individual’s gene as it’s key. Then every trigram and digram in
# the decrypted text is looked up in the table of how many times it occurs
# in the corpus. These numbers are then summed. Thus, trigrams and bigrams
# that occur commonly in the corpus are more heavily rewarded than those
# that do not.
sub fitness {
my ($individual) = @_;
my $permutated_alphabeth = $individual;
my $plain_text = $cypher_text;
# decrypt, using the given individual:
$_ = $plain_text;
eval "tr/$permutated_alphabeth/$alphabeth/";
$plain_text = $_;
# calculate the fitness function of the decrypted text:
my $fitness = 0;
my $i = 1;
foreach my $letter (split (//, $plain_text)) {
unless ($i == length $plain_text) {
my $next_letter = substr ($plain_text, $i, 1);
my $digram = $letter . $next_letter;
$fitness += $digram_frequency_table{$digram}
if (defined $digram_frequency_table{$digram});
unless ($i == (length $corpus) - 1) {
my $next_two_letters = substr ($plain_text, $i, 2);
my $trigram = $letter . $next_two_letters;
$fitness += $trigram_frequency_table{$trigram}
if (defined $trigram_frequency_table{$trigram});
} else {
$i ++;
next;
}
} else {
last;
}
$i ++;
}
return $fitness;
}
###############################################################################
# implementing mutation:
# This is implemented as a swap between probable neighbors based on the
# single letter frequencies of English. That is, the character ’e’ might
# be swapped with ’t’ but not with ’v’ or ’x’.
# In this case the random swap occurs only for the first 9 most frequent
# letters, based on the %letter_distribution_table.
sub mutation {
my ($individual) = @_;
my $mutated_individual = $individual;
my $letters_count = 9;
my $letter_position1 = int (rand ($letters_count));
my $letter_position2 = int (rand ($letters_count));
return $mutated_individual if ($letter_position1 == $letter_position2);
my @letters_sorted_by_distribution =
reverse
sort { $letter_distribution_table{$a} <=>
$letter_distribution_table{$b} }
keys %letter_distribution_table;
my $letter1 = $letters_sorted_by_distribution[$letter_position1];
my $letter2 = $letters_sorted_by_distribution[$letter_position2];
my $index1 = index ($alphabeth, $letter1);
my $index2 = index ($alphabeth, $letter2);
my @letters = split (//, $individual);
my $swap_letter1 = $letters[$index1];
my $swap_letter2 = $letters[$index2];
my $digram_left = $swap_letter1 . $swap_letter2;
my $digram_right = $swap_letter2 . $swap_letter1;
$_ = $mutated_individual;
eval "tr/$digram_left/$digram_right/";
$mutated_individual = $_;
return $mutated_individual;
}
###############################################################################
# generate random individual (generates random permutation of the alphabeth):
sub generate_random_individual {
my @letters = split (//, $alphabeth);
my $random_individual = '';
foreach my $i (reverse (1 .. length $alphabeth)) {
my $pos = int (rand $i);
$random_individual .= $letters[$pos];
# delete the concatenated element from @letters:
unless ($pos == $#letters) {
my $temp = $letters[$#letters];
$letters[$#letters] = $letters[$pos];
$letters[$pos] = $temp;
pop @letters;
} else {
pop @letters;
}
}
return $random_individual;
}
###############################################################################
# number of populations:
my $populations_count = 20;
###############################################################################
# number of individuals in a population:
my $individuals_count = 20;
###############################################################################
# array of populations:
my @populations;
###############################################################################
# describe a population of indiliduals:
# my $population = {
# 'individuals' => [],
# 'total_fitness' => 0
# };
###############################################################################
# describe an individual:
# my $individual = {
# 'description' => '',
# 'fitness' => 0
# };
###############################################################################
# sort individuals in a population by the fitness function:
sub sort_individuals {
my ($population) = @_;
@{$population->{'individuals'}} =
reverse sort { $a->{'fitness'} <=> $b->{'fitness'} }
@{$population->{'individuals'}};
}
###############################################################################
# sort populations by the cumulative fitness values:
sub sort_populations {
@populations =
reverse sort { $a->{'total_fitness'} <=> $b->{'total_fitness'} }
@populations;
}
###############################################################################
# initialize the first generation:
sub zero_generation {
foreach (1 .. $populations_count) {
my $population = {};
$population->{'individuals'} = [];
$population->{'total_fitness'} = 0;
foreach (1 .. $individuals_count) {
my $individual = {};
$individual->{'description'} = '';
$individual->{'fitness'} = 0;
my $random_individual = generate_random_individual ();
$individual->{'description'} = $random_individual;
my $fitness = fitness ($random_individual);
$individual->{'fitness'} = $fitness;
push (@{$population->{'individuals'}}, $individual);
$population->{'total_fitness'} += $fitness;
}
sort_individuals ($population);
push (@populations, $population);
}
sort_populations ();
}
###############################################################################
# number of individuals in a population that will mutate:
my $mutate_individuals_count = 4;
###############################################################################
# mutate some randomly chosen individuals in a population:
# A random range of $mutate_individuals_count individuals from a single
# population will mutate.
sub mutate_population {
my ($population) = @_;
my $offset = int
(rand ($individuals_count - $mutate_individuals_count));
foreach my $i (1 .. $mutate_individuals_count) {
my $individual =
${$population->{'individuals'}}[$offset + $i];
my $mutated_description =
mutation ($individual->{'description'});
$individual->{'description'} = $mutated_description;
$population->{'total_fitness'} -= $individual->{'fitness'};
$individual->{'fitness'} =
fitness ($mutated_description);
$population->{'total_fitness'} += $individual->{'fitness'};
}
sort_individuals ($population);
}
###############################################################################
# mutate all populations:
sub mutate_all_populations {
foreach my $population (@populations) {
mutate_population ($population);
}
sort_populations ();
}
###############################################################################
# breed the individuals in a population:
# The father is chosen among the 5 best individuals in a population.
# The mother is chosen among the rest individuals in the same population.
# Their child takes the place of the weakest individual in the population.
sub breeding {
my ($population) = @_;
my $father_pos = int (rand (5));
my $father_individual = ${$population->{'individuals'}}[$father_pos];
my $mother_pos = int (rand ($individuals_count - 5)) + 5;
my $mother_individual = ${$population->{'individuals'}}[$mother_pos];
my $child_individual =
${$population->{'individuals'}}[$individuals_count - 1];
my $father = $father_individual->{'description'};
my $mother = $mother_individual->{'description'};
my $child = crossover ($father, $mother);
$child_individual->{'description'} = $child;
$population->{'total_fitness'} -= $child_individual->{'fitness'};
$child_individual->{'fitness'} = fitness ($child);
$population->{'total_fitness'} += $child_individual->{'fitness'};
sort_individuals ($population);
}
###############################################################################
# breed all populations:
sub breed_all_populations {
foreach my $population (@populations) {
breeding ($population);
}
sort_populations ();
}
###############################################################################
# interbreeding between neighbour populations:
# The father is the best individual in the first population.
# The mother is randomly chosen individual from the next weakest population.
# Their child takes the place of the weakest individual in the mother's
# population. I thought it's quite reasonable to interbreed only neighbour
# populations (populations that have close values for the total_fitness).
sub interbreeding {
foreach my $i (0 .. $populations_count - 2) {
my $father_population = $populations[$i];
my $father_individual =
${$father_population->{'individuals'}}[0];
my $mother_population = $populations[$i + 1];
my $mother_pos = int (rand ($individuals_count));
my $mother_individual =
${$father_population->{'individuals'}}[$mother_pos];
my $child_individual =
${$mother_population->{'individuals'}}[$individuals_count - 1];
my $father = $father_individual->{'description'};
my $mother = $mother_individual->{'description'};
my $child = crossover ($father, $mother);
$child_individual->{'description'} = $child;
$mother_population->{'total_fitness'} -=
$child_individual->{'fitness'};
$child_individual->{'fitness'} = fitness ($child);
$mother_population->{'total_fitness'} +=
$child_individual->{'fitness'};
sort_individuals ($mother_population);
}
sort_populations ();
}
###############################################################################
# clone an individual:
sub clone_individual {
my ($individual) = @_;
my $cloned_individual = {};
$cloned_individual->{'description'} = $individual->{'description'};
$cloned_individual->{'fitness'} = $individual->{'fitness'};
return $cloned_individual;
}
###############################################################################
# sort array of individuals:
sub sort_best_individuals {
my ($ref_individuals) = @_;
@$ref_individuals =
reverse sort { $a->{'fitness'} <=> $b->{'fitness'} }
@$ref_individuals;
}
###############################################################################
# the best individuals throughout all generations:
my @best_individuals;
###############################################################################
# check if this individual is not already in the @best_individuals array:
sub is_member_individual {
my ($ref_individuals, $individual) = @_;
foreach my $member_individual (@$ref_individuals) {
if ($member_individual->{'description'} eq
$individual->{'description'}) {
return "TRUE";
}
}
return undef;
}
###############################################################################
# merges to sorted lists of individuals:
sub merge_individuals {
my ($ref_candidate_individuals) = @_;
my @new_individuals = (@best_individuals);
foreach my $candidate_individual (@$ref_candidate_individuals) {
push (@new_individuals, $candidate_individual)
unless (is_member_individual (\@new_individuals, $candidate_individual));
}
sort_best_individuals (\@new_individuals);
$#new_individuals = $populations_count - 1;
@best_individuals = @new_individuals;
}
###############################################################################
# select the best individuals, and if they are better than the existing ones,
# change them:
# This is a way to save the local maxima for all generations in the evolution.
sub select_best_individuals {
if (@best_individuals) {
my @best_individuals_in_this_generation;
foreach my $population (@populations) {
my $individual =
clone_individual (${$population->{'individuals'}}[0]);
push (@best_individuals_in_this_generation, $individual);
}
sort_best_individuals (\@best_individuals_in_this_generation);
merge_individuals (\@best_individuals_in_this_generation);
} else {
# first initialization of @best_individuals;
foreach my $population (@populations) {
my $individual =
clone_individual (${$population->{'individuals'}}[0]);
push (@best_individuals, $individual);
}
sort_best_individuals (\@best_individuals);
}
}
###############################################################################
# calculate cumulative fitness for all populations in a generation:
sub cumulative_fitness {
my $cumulative_fitness = 0;
foreach my $population (@populations) {
$cumulative_fitness += $population->{'total_fitness'};
}
return $cumulative_fitness;
}
###############################################################################
# calculate next generation:
sub next_generation {
breed_all_populations ();
select_best_individuals ();
interbreeding ();
select_best_individuals ();
mutate_all_populations ();
select_best_individuals ();
}
###############################################################################
# decrypt the cypher text, using the individual's description:
sub decrypt_individual {
my ($individual) = @_;
my $permutated_alphabeth = $individual->{'description'};
my $plain_text = $cypher_text;
# decrypt, using the given individual:
$_ = $plain_text;
eval "tr/$permutated_alphabeth/$alphabeth/";
$plain_text = $_;
return $plain_text;
}
###############################################################################
# prints all suggestions about the decription of the cypher text:
sub print_results {
open (RESULTS, '>', "decriptions.txt")
or die "Can't open file! $!";
foreach my $population (@populations) {
foreach my $individual (@{$population->{'individuals'}}) {
my $plain_text = decrypt_individual ($individual);
print RESULTS "$plain_text\n";
}
}
print RESULTS "\nBEST SUGGESTIONS:\n";
foreach my $individual (@best_individuals) {
my $plain_text = decrypt_individual ($individual);
print RESULTS "$plain_text\n";
}
close (RESULTS);
}
###############################################################################
# number of generations that are allowed to be without an improvement:
# on every improvement increment this value!
# on every worsening decrement this value!
my $generations_without_improvement = 10;
###############################################################################
# simulation of the evolution process:
sub evolution {
my $cumulative_fitness = 0;
zero_generation ();
select_best_individuals ();
$cumulative_fitness = cumulative_fitness ();
my $generations = 0;
while ($generations_without_improvement) {
print "GENERATION $generations\t\tFITNESS: $cumulative_fitness\tGENERATIONS LEFT: $generations_without_improvement\n";
$generations ++;
next_generation ();
my $new_cumulative_fitness = cumulative_fitness ();
if ($new_cumulative_fitness > $cumulative_fitness) {
$generations_without_improvement ++;
$cumulative_fitness = $new_cumulative_fitness;
} else {
$generations_without_improvement --;
}
}
print_results ();
}
evolution ();
You can submit a new version of this snippet if you have modified it and you feel it is appropriate to share with others..
|
 |