Initial commit

oahrens · Dec 31, 2017 · 8192c08 · 8192c08
commit 8192c08
Show file tree

Hide file tree

Showing 21 changed files with 122,042 additions and 0 deletions.
diff --git a/.idea/Randomness_Testing.iml b/.idea/Randomness_Testing.iml
diff --git a/.idea/libraries/R_User_Library.xml b/.idea/libraries/R_User_Library.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/ApproximateEntropy.py b/ApproximateEntropy.py
@@ -0,0 +1,67 @@
+from math import log as log
+from numpy import zeros as zeros
+from scipy.special import gammaincc as gammaincc
+
+class ApproximateEntropy:
+
+    @staticmethod
+    def approximate_entropy_test(binary_data:str, verbose=False, pattern_length=10):
+        """
+        from the NIST documentation http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf
+
+        As with the Serial test of Section 2.11, the focus of this test is the frequency of all possible
+        overlapping m-bit patterns across the entire sequence. The purpose of the test is to compare
+        the frequency of overlapping blocks of two consecutive/adjacent lengths (m and m+1) against the
+        expected result for a random sequence.
+
+        :param      binary_data:        a binary string
+        :param      verbose             True to display the debug message, False to turn off debug message
+        :param      pattern_length:     the length of the pattern (m)
+        :return:    ((p_value1, bool), (p_value2, bool)) A tuple which contain the p_value and result of serial_test(True or False)
+        """
+        length_of_binary_data = len(binary_data)
+
+        # Augment the n-bit sequence to create n overlapping m-bit sequences by appending m-1 bits
+        # from the beginning of the sequence to the end of the sequence.
+        # NOTE: documentation says m-1 bits but that doesnt make sense, or work.
+        binary_data += binary_data[:pattern_length + 1:]
+
+        # Get max length one patterns for m, m-1, m-2
+        max_pattern = ''
+        for i in range(pattern_length + 2):
+            max_pattern += '1'
+
+        # Keep track of each pattern's frequency (how often it appears)
+        vobs_01 = zeros(int(max_pattern[0:pattern_length:], 2) + 1)
+        vobs_02 = zeros(int(max_pattern[0:pattern_length + 1:], 2) + 1)
+
+        for i in range(length_of_binary_data):
+            # Work out what pattern is observed
+            vobs_01[int(binary_data[i:i + pattern_length:], 2)] += 1
+            vobs_02[int(binary_data[i:i + pattern_length + 1:], 2)] += 1
+
+        # Calculate the test statistics and p values
+        vobs = [vobs_01, vobs_02]
+
+        sums = zeros(2)
+        for i in range(2):
+            for j in range(len(vobs[i])):
+                if vobs[i][j] > 0:
+                    sums[i] += vobs[i][j] * log(vobs[i][j] / length_of_binary_data)
+        sums /= length_of_binary_data
+        ape = sums[0] - sums[1]
+
+        xObs = 2.0 * length_of_binary_data * (log(2) - ape)
+
+        p_value = gammaincc(pow(2, pattern_length - 1), xObs / 2.0)
+
+        if verbose:
+            print('Approximate Entropy Test DEBUG BEGIN:')
+            print("\tLength of input:\t\t\t", length_of_binary_data)
+            print('\tLength of each block:\t\t', pattern_length)
+            print('\tApEn(m):\t\t\t\t\t', ape)
+            print('\txObs:\t\t\t\t\t\t', xObs)
+            print('\tP-Value:\t\t\t\t\t', p_value)
+            print('DEBUG END.')
+
+        return (p_value, (p_value >= 0.01))
diff --git a/BinaryMatrix.py b/BinaryMatrix.py
@@ -0,0 +1,124 @@
+from copy import copy as copy
+
+class BinaryMatrix:
+
+    def __init__(self, matrix, rows, cols):
+        """
+        This class contains the algorithm specified in the NIST suite for computing the **binary rank** of a matrix.
+        :param matrix: the matrix we want to compute the rank for
+        :param rows: the number of rows
+        :param cols: the number of columns
+        :return: a BinaryMatrix object
+        """
+        self.M = rows
+        self.Q = cols
+        self.A = matrix
+        self.m = min(rows, cols)
+
+    def compute_rank(self, verbose=False):
+        """
+        This method computes the binary rank of self.matrix
+        :param verbose: if this is true it prints out the matrix after the forward elimination and backward elimination
+        operations on the rows. This was used to testing the method to check it is working as expected.
+        :return: the rank of the matrix.
+        """
+        if verbose:
+            print("Original Matrix\n", self.A)
+
+        i = 0
+        while i < self.m - 1:
+            if self.A[i][i] == 1:
+                self.perform_row_operations(i, True)
+            else:
+                found = self.find_unit_element_swap(i, True)
+                if found == 1:
+                    self.perform_row_operations(i, True)
+            i += 1
+
+        if verbose:
+            print("Intermediate Matrix\n", self.A)
+
+        i = self.m - 1
+        while i > 0:
+            if self.A[i][i] == 1:
+                self.perform_row_operations(i, False)
+            else:
+                if self.find_unit_element_swap(i, False) == 1:
+                    self.perform_row_operations(i, False)
+            i -= 1
+
+        if verbose:
+            print("Final Matrix\n", self.A)
+
+        return self.determine_rank()
+
+    def perform_row_operations(self, i, forward_elimination):
+        """
+        This method performs the elementary row operations. This involves xor'ing up to two rows together depending on
+        whether or not certain elements in the matrix contain 1's if the "current" element does not.
+        :param i: the current index we are are looking at
+        :param forward_elimination: True or False.
+        """
+        if forward_elimination:
+            j = i + 1
+            while j < self.M:
+                if self.A[j][i] == 1:
+                    self.A[j, :] = (self.A[j, :] + self.A[i, :]) % 2
+                j += 1
+        else:
+            j = i - 1
+            while j >= 0:
+                if self.A[j][i] == 1:
+                    self.A[j, :] = (self.A[j, :] + self.A[i, :]) % 2
+                j -= 1
+
+    def find_unit_element_swap(self, i, forward_elimination):
+        """
+        This given an index which does not contain a 1 this searches through the rows below the index to see which rows
+        contain 1's, if they do then they swapped. This is done on the forward and backward elimination
+        :param i: the current index we are looking at
+        :param forward_elimination: True or False.
+        """
+        row_op = 0
+        if forward_elimination:
+            index = i + 1
+            while index < self.M and self.A[index][i] == 0:
+                index += 1
+            if index < self.M:
+                row_op = self.swap_rows(i, index)
+        else:
+            index = i - 1
+            while index >= 0 and self.A[index][i] == 0:
+                index -= 1
+            if index >= 0:
+                row_op = self.swap_rows(i, index)
+        return row_op
+
+    def swap_rows(self, i, ix):
+        """
+        This method just swaps two rows in a matrix. Had to use the copy package to ensure no memory leakage
+        :param i: the first row we want to swap and
+        :param ix: the row we want to swap it with
+        :return: 1
+        """
+        temp = copy(self.A[i, :])
+        self.A[i, :] = self.A[ix, :]
+        self.A[ix, :] = temp
+        return 1
+
+    def determine_rank(self):
+        """
+        This method determines the rank of the transformed matrix
+        :return: the rank of the transformed matrix
+        """
+        rank = self.m
+        i = 0
+        while i < self.M:
+            all_zeros = 1
+            for j in range(self.Q):
+                if self.A[i][j] == 1:
+                    all_zeros = 0
+            if all_zeros == 1:
+                rank -= 1
+            i += 1
+        return rank
diff --git a/Complexity.py b/Complexity.py
@@ -0,0 +1,115 @@
+from copy import copy as copy
+from numpy import dot as dot
+from numpy import histogram as histogram
+from numpy import zeros as zeros
+from scipy.special import gammaincc as gammaincc
+
+class ComplexityTest:
+
+    @staticmethod
+    def linear_complexity_test(binary_data:str, verbose=False, block_size=500):
+        """
+        Note that this description is taken from the NIST documentation [1]
+        [1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf
+        The focus of this test is the length of a linear feedback shift register (LFSR). The purpose of this test is to
+        determine whether or not the sequence is complex enough to be considered random. Random sequences are
+        characterized by longer LFSRs. An LFSR that is too short implies non-randomness.
+
+        :param      binary_data:    a binary string
+        :param      verbose         True to display the debug messgae, False to turn off debug message
+        :param      block_size:     Size of the block
+        :return:    (p_value, bool) A tuple which contain the p_value and result of frequency_test(True or False)
+
+        """
+
+        length_of_binary_data = len(binary_data)
+
+        # The number of degrees of freedom;
+        # K = 6 has been hard coded into the test.
+        degree_of_freedom = 6
+
+        #  π0 = 0.010417, π1 = 0.03125, π2 = 0.125, π3 = 0.5, π4 = 0.25, π5 = 0.0625, π6 = 0.020833
+        #  are the probabilities computed by the equations in Section 3.10
+        pi = [0.01047, 0.03125, 0.125, 0.5, 0.25, 0.0625, 0.020833]
+
+        t2 = (block_size / 3.0 + 2.0 / 9) / 2 ** block_size
+        mean = 0.5 * block_size + (1.0 / 36) * (9 + (-1) ** (block_size + 1)) - t2
+
+        number_of_block = int(length_of_binary_data / block_size)
+
+        if number_of_block > 1:
+            block_end = block_size
+            block_start = 0
+            blocks = []
+            for i in range(number_of_block):
+                blocks.append(binary_data[block_start:block_end])
+                block_start += block_size
+                block_end += block_size
+
+            complexities = []
+            for block in blocks:
+                complexities.append(ComplexityTest.berlekamp_massey_algorithm(block))
+
+            t = ([-1.0 * (((-1) ** block_size) * (chunk - mean) + 2.0 / 9) for chunk in complexities])
+            vg = histogram(t, bins=[-9999999999, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 9999999999])[0][::-1]
+            im = ([((vg[ii] - number_of_block * pi[ii]) ** 2) / (number_of_block * pi[ii]) for ii in range(7)])
+
+            xObs = 0.0
+            for i in range(len(pi)):
+                xObs += im[i]
+
+            # P-Value = igamc(K/2, xObs/2)
+            p_value = gammaincc(degree_of_freedom / 2.0, xObs / 2.0)
+
+            if verbose:
+                print('Linear Complexity Test DEBUG BEGIN:')
+                print("\tLength of input:\t", length_of_binary_data)
+                print('\tLength in bits of a block:\t', )
+                print("\tDegree of Freedom:\t\t", degree_of_freedom)
+                print('\tNumber of Blocks:\t', number_of_block)
+                print('\tValue of Vs:\t\t', vg)
+                print('\txObs:\t\t\t\t', xObs)
+                print('\tP-Value:\t\t\t', p_value)
+                print('DEBUG END.')
+
+
+            return (p_value, (p_value >= 0.01))
+        else:
+            return (-1.0, False)
+
+    @staticmethod
+    def berlekamp_massey_algorithm(block_data):
+        """
+        An implementation of the Berlekamp Massey Algorithm. Taken from Wikipedia [1]
+        [1] - https://en.wikipedia.org/wiki/Berlekamp-Massey_algorithm
+        The Berlekamp–Massey algorithm is an algorithm that will find the shortest linear feedback shift register (LFSR)
+        for a given binary output sequence. The algorithm will also find the minimal polynomial of a linearly recurrent
+        sequence in an arbitrary field. The field requirement means that the Berlekamp–Massey algorithm requires all
+        non-zero elements to have a multiplicative inverse.
+        :param block_data:
+        :return:
+        """
+        n = len(block_data)
+        c = zeros(n)
+        b = zeros(n)
+        c[0], b[0] = 1, 1
+        l, m, i = 0, -1, 0
+        int_data = [int(el) for el in block_data]
+        while i < n:
+            v = int_data[(i - l):i]
+            v = v[::-1]
+            cc = c[1:l + 1]
+            d = (int_data[i] + dot(v, cc)) % 2
+            if d == 1:
+                temp = copy(c)
+                p = zeros(n)
+                for j in range(0, l):
+                    if b[j] == 1:
+                        p[j + i - m] = 1
+                c = (c + p) % 2
+                if l <= 0.5 * i:
+                    l = i + 1 - l
+                    m = i
+                    b = temp
+            i += 1
+        return l