# Copyright (C) 2006 Mike Howard - All Rights Reserved # Copyright (C) yyyy name of author # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. require 'pp' class Stats attr_reader :occurances, :word_length_occurances, :word_length_count def initialize @occurances = Hash.new() @word_length_occurances = Hash.new() @count = 0 end def register(word) @count += 1 @occurances[word] ||= 0 @occurances[word] += 1 @word_length_occurances[word.length()] ||= 0 @word_length_occurances[word.length()] += 1 end def count_word_lengths @word_length_count = Hash.new() @occurances.keys().each() do |word| @word_length_count[word.length()] ||= 0 @word_length_count[word.length()] += 1 end end def prt self.count_word_lengths() printf "distinct_word_count %d\n", @occurances.keys().length() printf("Total of %d words scanned\n", @count) printf("") distinct_word_count = @occurances.keys().length() printf("Distinct Words Table\n") printf("Total number of Distinct Words: %d\n", distinct_word_count) printf("") printf("Len Percent Count\n") @word_length_count.keys().sort().each() do |n| fraction = (@word_length_count[n] + 0.0) / distinct_word_count printf("%3d %10.2f %10d\n", n, fraction * 100.0, @word_length_count[n]) end end def prt_occurances for word in sorted(stats.occurances.keys()): puts "#{word}: #{@occurances[word]}" end end end word_re = Regexp.compile(%r{^["']?([a-zA-Z]+)[.!?'"]?$}) stats = Stats.new() ARGV.each do | fname | File.new(fname).each do |line| line.split().each do |token| if match_obj = word_re.match(token) stats.register(match_obj.to_a[1].downcase()) end end end end def passwd_count(stats, pw_len, min_digits, max_digits) printf("computing number expected passwords of overall len %d containing %d to %d digits\n", pw_len, min_digits, max_digits) n_count = 0 caseful_count = 0 caseless_count = 0 min_digits.upto(max_digits) do |digits| if (n = pw_len - digits) <= 0 next end # compute number of possible base passwords - all lower case # which can be constructed from words or concatenations of # words two characters or longer n_count = stats.word_length_count()[n] 2.upto(n/2) do |m| n_count += stats.word_length_count()[m] * stats.word_length_count()[n-m] end if $verbose printf("Base words of length %d = %d\n", n, n_count) end # compute multiplier for digits multiplier = 1 (n+1).upto(n+1+digits) { |m| multiplier *= m * 10 } caseless_count += n_count * multiplier caseful_count += 2**n * multiplier * n_count if $verbose printf("Possible Passwords of length %d containing %d digits:\n %g [base: %g, caseless multiplier: %g caseful multilplier: %g]\n", n+digits, digits, n_count * multiplier, n_count, multiplier, multiplier * 2 ** n) end end printf(" Caseless: %g\n", caseless_count) printf(" Caseful: %g\n", caseful_count) end stats.prt() $verbose = FALSE 4.upto(12) do |len| passwd_count(stats, len, 2, 2) passwd_count(stats, len, 3, 3) passwd_count(stats, len, 4, 4) end