Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy loaded (and evictable) dictionaries #18

Merged
merged 4 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions lib/zxcvbn.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,33 @@ class Error < StandardError; end
)

def self.zxcvbn(password, user_inputs = [])
start = (Time.now.to_f * 1000).to_i
matches = Matching.omnimatch(password, user_inputs)
result = Scoring.most_guessable_match_sequence(password, matches)
result["calc_time"] = (Time.now.to_f * 1000).to_i - start
attack_times = TimeEstimates.estimate_attack_times(result["guesses"])
attack_times.each do |prop, val|
result[prop] = val
end
result["feedback"] = Feedback.get_feedback(result["score"], result["sequence"])
result
Tester.new.zxcvbn(password, user_inputs)
end

def self.test(password, user_inputs = [])
Result.new(Zxcvbn.zxcvbn(password, user_inputs))
Tester.new.test(password, user_inputs)
end
Comment on lines 26 to 32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about making both of these use a shared, memoized Tester instance? That'd still lazy load the first time, but avoid reloading all the time.

To unload / reload on every use, an app could just instantiate a new Tester each time.


class Tester
def matching
@matching ||= Matching.new
end

def test(password, user_inputs = [])
Zxcvbn.test(password, user_inputs)
Result.new(zxcvbn(password, user_inputs))
end

def zxcvbn(password, user_inputs = [])
start = (Time.now.to_f * 1000).to_i
matches = matching.omnimatch(password, user_inputs)
result = Scoring.most_guessable_match_sequence(password, matches)
result["calc_time"] = (Time.now.to_f * 1000).to_i - start
attack_times = TimeEstimates.estimate_attack_times(result["guesses"])
attack_times.each do |prop, val|
result[prop] = val
end
result["feedback"] = Feedback.get_feedback(result["score"], result["sequence"])
result
end
end
end
6 changes: 2 additions & 4 deletions lib/zxcvbn/frequency_lists.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ def self.frequency_lists
def self.file_enumerator(filename)
Enumerator.new do |main_enum|
File.open(filename) do |f|
f.each_line do |line|
next if line.nil?

main_enum << line.strip!.freeze
f.each_line(chomp: true) do |line|
main_enum << line.freeze
end
end
end
Expand Down
108 changes: 58 additions & 50 deletions lib/zxcvbn/matching.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# frozen_string_literal: true

module Zxcvbn
module Matching
def self.build_ranked_dict(ordered_list)
class Matching
def build_ranked_dict(ordered_list)
result = {}
# rank starts at 1, not 0
ordered_list.each_with_index do |word, idx|
Expand All @@ -11,35 +11,43 @@ def self.build_ranked_dict(ordered_list)
result
end

RANKED_DICTIONARIES = Zxcvbn.frequency_lists.transform_values do |lst|
build_ranked_dict(lst)
def ranked_dictionaries
@ranked_dictionaries ||= Zxcvbn.frequency_lists.transform_values do |lst|
build_ranked_dict(lst)
end
end

RANKED_DICTIONARIES_MAX_WORD_SIZE = RANKED_DICTIONARIES.transform_values do |word_scores|
word_scores.each_key.max_by(&:size)&.size || 0
def ranked_dictionaries_max_word_size
@ranked_dictionaries_max_word_size ||= ranked_dictionaries.transform_values do |word_scores|
word_scores.each_key.max_by(&:size)&.size || 0
end
end

GRAPHS = {
"qwerty" => ADJACENCY_GRAPHS["qwerty"],
"dvorak" => ADJACENCY_GRAPHS["dvorak"],
"keypad" => ADJACENCY_GRAPHS["keypad"],
"mac_keypad" => ADJACENCY_GRAPHS["mac_keypad"]
}.freeze
def graphs
@graphs ||= {
"qwerty" => ADJACENCY_GRAPHS["qwerty"],
"dvorak" => ADJACENCY_GRAPHS["dvorak"],
"keypad" => ADJACENCY_GRAPHS["keypad"],
"mac_keypad" => ADJACENCY_GRAPHS["mac_keypad"]
}.freeze
end

L33T_TABLE = {
"a" => ["4", "@"],
"b" => ["8"],
"c" => ["(", "{", "[", "<"],
"e" => ["3"],
"g" => ["6", "9"],
"i" => ["1", "!", "|"],
"l" => ["1", "|", "7"],
"o" => ["0"],
"s" => ["$", "5"],
"t" => ["+", "7"],
"x" => ["%"],
"z" => ["2"]
}.freeze
def l33t_table
@l33t_table ||= {
"a" => ["4", "@"],
"b" => ["8"],
"c" => ["(", "{", "[", "<"],
"e" => ["3"],
"g" => ["6", "9"],
"i" => ["1", "!", "|"],
"l" => ["1", "|", "7"],
"o" => ["0"],
"s" => ["$", "5"],
"t" => ["+", "7"],
"x" => ["%"],
"z" => ["2"]
}.freeze
end

REGEXEN = {
# alpha_lower: /[a-z]/,
Expand Down Expand Up @@ -116,25 +124,25 @@ def self.build_ranked_dict(ordered_list)
]
}.freeze

def self.translate(string, chr_map)
def translate(string, chr_map)
string.chars.map { |chr| chr_map[chr] || chr }.join
end

def self.sorted(matches)
def sorted(matches)
# sort on i primary, j secondary
matches.sort_by! { |match| [match["i"], match["j"]] }
end

# ------------------------------------------------------------------------------
# omnimatch -- combine everything ----------------------------------------------
# ------------------------------------------------------------------------------
def self.omnimatch(password, user_inputs = [])
def omnimatch(password, user_inputs = [])
user_dict = build_user_input_dictionary(user_inputs)
matches = []
matches += dictionary_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONARIES)
matches += reverse_dictionary_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONARIES)
matches += l33t_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONARIES, _l33t_table = L33T_TABLE)
matches += spatial_match(password, _graphs = GRAPHS)
matches += dictionary_match(password, user_dict, _ranked_dictionaries = ranked_dictionaries)
matches += reverse_dictionary_match(password, user_dict, _ranked_dictionaries = ranked_dictionaries)
matches += l33t_match(password, user_dict, _ranked_dictionaries = ranked_dictionaries, _l33t_table = l33t_table)
matches += spatial_match(password, _graphs = graphs)
matches += repeat_match(password, user_dict)
matches += sequence_match(password)
matches += regex_match(password, _regexen = REGEXEN)
Expand All @@ -145,7 +153,7 @@ def self.omnimatch(password, user_inputs = [])
#-------------------------------------------------------------------------------
# dictionary match (common passwords, english, last names, etc) ----------------
#-------------------------------------------------------------------------------
def self.dictionary_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONARIES)
def dictionary_match(password, user_dict, _ranked_dictionaries = ranked_dictionaries)
# _ranked_dictionaries variable is for unit testing purposes
matches = []
_ranked_dictionaries.each do |dictionary_name, ranked_dict|
Expand All @@ -155,10 +163,10 @@ def self.dictionary_match(password, user_dict, _ranked_dictionaries = RANKED_DIC
sorted(matches)
end

def self.check_dictionary(matches, password, dictionary_name, ranked_dict)
def check_dictionary(matches, password, dictionary_name, ranked_dict)
len = password.length
password_lower = password.downcase
longest_word_size = RANKED_DICTIONARIES_MAX_WORD_SIZE.fetch(dictionary_name) do
longest_word_size = ranked_dictionaries_max_word_size.fetch(dictionary_name) do
ranked_dict.each_key.max_by(&:size)&.size || 0
end
search_width = [longest_word_size, len].min
Expand All @@ -184,7 +192,7 @@ def self.check_dictionary(matches, password, dictionary_name, ranked_dict)
end
end

def self.reverse_dictionary_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONARIES)
def reverse_dictionary_match(password, user_dict, _ranked_dictionaries = ranked_dictionaries)
reversed_password = password.reverse
matches = dictionary_match(reversed_password, user_dict, _ranked_dictionaries)
matches.each do |match|
Expand All @@ -196,7 +204,7 @@ def self.reverse_dictionary_match(password, user_dict, _ranked_dictionaries = RA
sorted(matches)
end

def self.build_user_input_dictionary(user_inputs_or_dict)
def build_user_input_dictionary(user_inputs_or_dict)
# optimization: if we receive a hash, we've been given the dict back (from the repeat matcher)
return user_inputs_or_dict if user_inputs_or_dict.is_a?(Hash)

Expand All @@ -211,7 +219,7 @@ def self.build_user_input_dictionary(user_inputs_or_dict)
# dictionary match with common l33t substitutions ------------------------------
#-------------------------------------------------------------------------------
# makes a pruned copy of l33t_table that only includes password's possible substitutions
def self.relevant_l33t_subtable(password, table)
def relevant_l33t_subtable(password, table)
password_chars = {}
password.chars.each do |chr|
password_chars[chr] = true
Expand All @@ -228,7 +236,7 @@ def self.relevant_l33t_subtable(password, table)
end

# returns the list of possible 1337 replacement dictionaries for a given password
def self.enumerate_l33t_subs(table)
def enumerate_l33t_subs(table)
dedup = lambda do |subs|
deduped = []
members = {}
Expand Down Expand Up @@ -293,7 +301,7 @@ def self.enumerate_l33t_subs(table)
sub_dicts
end

def self.l33t_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONARIES, _l33t_table = L33T_TABLE)
def l33t_match(password, user_dict, _ranked_dictionaries = ranked_dictionaries, _l33t_table = l33t_table)
matches = []
enumerate_l33t_subs(relevant_l33t_subtable(password, _l33t_table)).each do |sub|
break if sub.empty? # corner case: password has no relevant subs.
Expand Down Expand Up @@ -327,7 +335,7 @@ def self.l33t_match(password, user_dict, _ranked_dictionaries = RANKED_DICTIONAR
# ------------------------------------------------------------------------------
# spatial match (qwerty/dvorak/keypad) -----------------------------------------
# ------------------------------------------------------------------------------
def self.spatial_match(password, _graphs = GRAPHS)
def spatial_match(password, _graphs = graphs)
matches = []
_graphs.each do |graph_name, graph|
matches += spatial_match_helper(password, graph, graph_name)
Expand All @@ -337,7 +345,7 @@ def self.spatial_match(password, _graphs = GRAPHS)

SHIFTED_RX = /[~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:"ZXCVBNM<>?]/.freeze

def self.spatial_match_helper(password, graph, graph_name)
def spatial_match_helper(password, graph, graph_name)
matches = []
i = 0
while i < password.length - 1
Expand Down Expand Up @@ -409,7 +417,7 @@ def self.spatial_match_helper(password, graph, graph_name)
#-------------------------------------------------------------------------------
# repeats (aaa, abcabcabc) and sequences (abcdef) ------------------------------
#-------------------------------------------------------------------------------
def self.repeat_match(password, user_dict)
def repeat_match(password, user_dict)
matches = []
greedy = /(.+)\1+/
lazy = /(.+?)\1+/
Expand Down Expand Up @@ -462,7 +470,7 @@ def self.repeat_match(password, user_dict)

MAX_DELTA = 5

def self.sequence_match(password)
def sequence_match(password)
# Identifies sequences by looking for repeated differences in unicode codepoint.
# this allows skipping, such as 9753, and also matches some extended unicode sequences
# such as Greek and Cyrillic alphabets.
Expand Down Expand Up @@ -532,7 +540,7 @@ def self.sequence_match(password)
#-------------------------------------------------------------------------------
# regex matching ---------------------------------------------------------------
#-------------------------------------------------------------------------------
def self.regex_match(password, _regexen = REGEXEN)
def regex_match(password, _regexen = REGEXEN)
matches = []
_regexen.each do |name, regex|
# regex.lastIndex = 0; # keeps regex_match stateless
Expand All @@ -556,7 +564,7 @@ def self.regex_match(password, _regexen = REGEXEN)
#-------------------------------------------------------------------------------
# date matching ----------------------------------------------------------------
#-------------------------------------------------------------------------------
def self.date_match(password)
def date_match(password)
# a "date" is recognized as:
# any 3-tuple that starts or ends with a 2- or 4-digit year,
# with 2 or 0 separator chars (1.1.91 or 1191),
Expand Down Expand Up @@ -662,7 +670,7 @@ def self.date_match(password)
end)
end

def self.map_ints_to_dmy(ints)
def map_ints_to_dmy(ints)
# given a 3-tuple, discard if:
# middle int is over 31 (for all dmy formats, years are never allowed in the middle)
# middle int is zero
Expand Down Expand Up @@ -722,7 +730,7 @@ def self.map_ints_to_dmy(ints)
nil
end

def self.map_ints_to_dm(ints)
def map_ints_to_dm(ints)
[ints, ints.reverse].each do |(d, m)|
if (d >= 1 && d <= 31) && (m >= 1 && m <= 12)
return {
Expand All @@ -734,7 +742,7 @@ def self.map_ints_to_dm(ints)
nil
end

def self.two_to_four_digit_year(year)
def two_to_four_digit_year(year)
if year > 99
year
elsif year > 50
Expand Down
Loading