From f8a5b05c118c5888bd0cb19b1cb426eb26df121f Mon Sep 17 00:00:00 2001
From: David Barda <davebarda@gmail.com>
Date: Sun, 6 Nov 2016 16:56:50 +0200
Subject: [PATCH] Changed some comments and a lazy evaluation example function

---
 EllipsoidSpec.hs       | 10 ++++---
 KNNSpec.hs             |  2 +-
 Label.hs               | 10 +++++++
 Learner.hs             |  7 +++++
 Loss.hs                |  3 ++
 OnlineLearner.hs       |  2 +-
 Project Specifications | 68 +++++++++++++++++++++++-------------------
 VectorUtils.hs         |  2 ++
 Writer.hs              | 22 ++++++++++++--
 9 files changed, 87 insertions(+), 39 deletions(-)

diff --git a/EllipsoidSpec.hs b/EllipsoidSpec.hs
index 9916595..6fc940e 100644
--- a/EllipsoidSpec.hs
+++ b/EllipsoidSpec.hs
@@ -1,5 +1,5 @@
 module EllipsoidSpec where
--- This module that contains tests for the ellipsoid learner
+-- This module contains tests for the ellipsoid learner
 
 import Label
 import Writer
@@ -8,9 +8,10 @@ import Test.Hspec
 import OnlineLearner
 import Data.Matrix
 
--- The Ellipsoid knowledge contains fractions, like eta, a and w. This can cause a problem when comparing two ellipsoids
--- which should contain the same values, but because of a different rounding don't. This function that rounds all the
--- fractions in the knowledge to N digits to enable comparison.
+-- The Ellipsoid knowledge contains fractions, like eta, a and w. This can cause
+-- a problem when comparing two ellipsoids which should contain the same values,
+-- but because of a different rounding appear to be different.
+-- This function rounds all the fractions in the knowledge to N digits to enable comparison.
 roundKnowledgeToNDigits :: Int -> TrainingKnowledge -> TrainingKnowledge
 roundKnowledgeToNDigits digits (EllipsoidKnowledge d eta' a' w') = EllipsoidKnowledge d roundedEta roundedA roundedW
   where
@@ -75,6 +76,7 @@ main = do
       describe "Batch tests" $ do
         it "Train on one example" $
           roundAndCompare batchTrainKnowledge train1Knowledge
+
         it "Train on a lot examples" $ do
           let a' = fromList 2 2 [0.65843621, -0.39506173,
                                 -0.39506173, 0.55308642]
diff --git a/KNNSpec.hs b/KNNSpec.hs
index 2a50d67..4481043 100644
--- a/KNNSpec.hs
+++ b/KNNSpec.hs
@@ -1,5 +1,5 @@
 module KNNSpec where
--- This module that contains tests for the KNN learner
+-- This module contains tests for the KNN learner
 
 import Test.Hspec
 import Learner
diff --git a/Label.hs b/Label.hs
index 7ae5cf3..78f7c25 100644
--- a/Label.hs
+++ b/Label.hs
@@ -1,3 +1,13 @@
+-- This module represents the possible labels that an example can have.
+-- We chose Double and Int because these are the most basic labels possible -
+-- A double can be used for most regression problems (up to a specific precision or scale).
+-- An int can be used for more classification problems.
+-- We've implemented this module with extensibility in mind.
+-- We implemented the LabelType data in order to be able to check the type of a
+-- specific label - is it a Double, or an Int?
+-- This is important for the KNN leaner that can either function as a regression
+-- learner or a classifier.
+
 module Label(Label(..), LabelType(..), labelToDouble, labelToInt) where
 
 -- LabelType data type represents the type of the labels that can be returned by
diff --git a/Learner.hs b/Learner.hs
index e5a7cca..07a45c5 100644
--- a/Learner.hs
+++ b/Learner.hs
@@ -1,3 +1,5 @@
+-- A module that bundles all the offline learners, at the moment as an example contains
+-- both classification and regression KNN.
 module Learner(TrainingKnowledge, LearningParameters(..), LearnerParameters(..),
     train, classify, Learner.error) where
 
@@ -31,9 +33,14 @@ train classifier@(LearningParameters (KNN _  _)  _) xs ys = KNNKnowledge xs ys c
 
 -- A function that is used to classify a new example using the "knowledge" obtained by the train proccess.
 classify :: TrainingKnowledge -> ExampleType -> Label
+
+-- classify for KNN regression
+-- An average of the labels of the k closest neighbors among the training set
 classify knowledge@(KNNKnowledge _ _ (LearningParameters knn DoubleType)) toClassify =
   LDouble (sum (map labelToDouble $ labelOfClosestNeighbors knowledge toClassify) / fromIntegral (k knn))
 
+-- classify for KNN classification
+-- Returns the label that appears the most among the k closest neighbors of the training set
 classify knowledge@(KNNKnowledge _ _ (LearningParameters _ IntType)) toClassify = LInt result
   where
     result = snd $ maximumBy (comparing fst) $ zip (map length labelesGroupedByValue) labelsValues
diff --git a/Loss.hs b/Loss.hs
index ed41d2c..2e77b7e 100644
--- a/Loss.hs
+++ b/Loss.hs
@@ -1,3 +1,6 @@
+-- This module houses the loss functions used in our various learners.
+-- More loss functions can be added to it easily.
+
 module Loss(binaryLoss, quadriaticLoss, Loss) where
 
 import Label
diff --git a/OnlineLearner.hs b/OnlineLearner.hs
index 50e2ffc..54c77bd 100644
--- a/OnlineLearner.hs
+++ b/OnlineLearner.hs
@@ -6,7 +6,7 @@ import Writer
 import Loss
 import Data.Matrix
 
--- Example represents the examples that online learners can receive
+-- Example represents an example that online learners can receive
 type Example = Matrix Double
 
 -- LearnerParameters data type represents the specific parameters for each learner.
diff --git a/Project Specifications b/Project Specifications
index 19e757e..93e7672 100644
--- a/Project Specifications	
+++ b/Project Specifications	
@@ -1,46 +1,54 @@
 Specification
 ------------------
-EllipsoidSpec.hs:
-KNNSpec.hs:
-Label.hs:
-Learner.hs:
-Loss.hs:
-OnlineLearner.hs:
-Project Speficiation: this file.
-VectorUtils.hs:
-Writer.hs:
-
-lazy evaluation - To be checked
-Pattern matching - Multiple functions, Learner.hs error i.e
-higher order functions - Multiple functions, Learner.hs error i.e
-new types - Multiple places, KNNKnowledge in OnlineLearner.hs i.e
-derived - Multiple places, Label derived Show in Label.hs (also instance of Ord and Eq)
-lambda functions - Where DiffList instantiate Monoid at Writer.hs.
-applicative functor - Writer.hs Monad in Writer.hs (instance of applicative as it is a monad).
-monoids - DiffList in Writer.hs instantiate Monoid.
+
+Files
+------------------
+EllipsoidSpec.hs
+KNNSpec.hs
+Label.hs
+Learner.hs
+Loss.hs
+OnlineLearner.hs
+Project Speficiation
+VectorUtils.hs
+Writer.hs
+
+Idioms we used
+------------------
+Pattern matching - classify in Learner.hs, Eq in Label.hs, etc'.
+Higher order functions - error in Learner.hs, doing `mappend` between DiffLists
+  creates a new function in the DiffList (there is a concatenation of functions), etc'.
+New types - LearningParameters, TrainingKnowledge in Learner.hs, etc'.
+Derived - Label derived Show in Label.hs (also instance of Ord and Eq).
+Lambda functions - We use lambdas where DiffList instantiate Monoid at Writer.hs (line 37).
+Applicative functor - Writer monad in Writer.hs (instance of applicative as it is a monad).
+Monoids - DiffList in Writer.hs instantiate Monoid.
+Lazy evaluation - takeFromDiffList in DiffList, Printing a DiffList is lazy, etc'.
 
 Functionality Pros
 ------------------
-1) High order functions make the ability to get functions that get as many
+1) Writing functional code is like writing math!
+
+2) High order functions make the ability to get functions that get as many
 parameters as needed and returns function without complicated function pointers.
 
-2) It was easy to write the tests because each function can be isolated
+3) It was easy to write the tests because each function can be isolated
 as it has its specific arguments and return value without any relation to a
-class or globals.
+class or global variables.
 
-3) It was easier to debug as the functions are stateless and
+4) It was easier to debug as the functions are stateless and
 no members or in function variables could be a reason for bugs.
 
+5) Code usually is much shorter.
+
 
 Functionality Cons
 ------------------
-1) Its hard to predict the spaced used in the program (Due to lazyness 4 can actually be 2+2 which take more bits to represent).
-2) Its hard to compare the running time in relation to a given imperative code(Most of existing code is written by imperative languages).
-3) As imperative programmers its hard to learn and master(Due to the differences between the methodology).
+1) Its hard to predict the spaced used in the program (Due to lazyness 4 can
+actually be 2+2 which take more bits to represent).
 
+2) Its hard to compare the running time in relation to a given imperative code
+(most of the existing code is written by imperative languages).
 
-Remaining Tasks(Temp)
-------------------
-Add error to OnlineLearner
-Refactor Learner?
-Finish Project Specification
+3) As imperative programmers its hard to learn and master (due to the differences
+between imperative and functional languages).
diff --git a/VectorUtils.hs b/VectorUtils.hs
index 919edd8..51bf873 100644
--- a/VectorUtils.hs
+++ b/VectorUtils.hs
@@ -1,3 +1,5 @@
+-- Various utilities used throughout our project.
+
 module VectorUtils(distanceFromVector, lNorm, Norm) where
 
 -- Norm is a type that represents a norm function
diff --git a/Writer.hs b/Writer.hs
index f20e360..25b9e03 100644
--- a/Writer.hs
+++ b/Writer.hs
@@ -1,4 +1,8 @@
-module Writer (Writer(..), DiffList(..), tell, toDiffList, fromDiffList, getValFromWriter, showIndent) where
+-- A module of a writer, which is capable of chaining logs and values in a "monadic" way.
+
+module Writer (Writer(..), DiffList(..), tell, toDiffList, fromDiffList,
+  getValFromWriter, showIndent, takeFromDiffList) where
+
 import Control.Monad (liftM, ap)
 import Data.String.Utils
 
@@ -10,13 +14,21 @@ newtype Writer w a = Writer { runWriter :: (a, w) }
 -- https://wiki.haskell.org/Difference_list
 newtype DiffList a = DiffList { getDiffList :: [a] -> [a] }
 
+-- Every Monad is an Applicative, so that means Monads are also Functors.
+-- liftM is fmap implemented with return and (>>=), and thus allows us to
+-- expose the Functor within the Monad
 instance (Monoid w) => Functor (Writer w) where
   fmap = liftM
 
+-- Since GHC 7.10 Applicative was defined as a superclass of Monad -------------
 instance (Monoid w) =>  Applicative (Writer w) where
   pure  = return
   (<*>) = ap
+--------------------------------------------------------------------------------
 
+-- return should wrap a value x with our Writer monad
+-- (Writer (x, v)) >>= f should activate f on x, getting a value y, append the
+-- new log v' to v, and return Writer (y, v 'mappend' v')
 instance (Monoid w) => Monad (Writer w) where
     return x = Writer (x, mempty)
     (Writer (x, v)) >>= f = let (Writer (y, v')) = f x in Writer (y, v `mappend` v')
@@ -26,11 +38,11 @@ instance Monoid (DiffList a) where
     (DiffList f) `mappend` (DiffList g) = DiffList (f.g)
 
 -- A function that is used to create a writer with "dummy" values so a simple text
--- will be writter to the log.
+-- will be written to the log.
 tell :: w -> Writer w ()
 tell x = Writer ((), x)
 
--- A function that is used to get the resulted value from the writer.
+-- A function that is used to get the result value from the writer.
 getValFromWriter :: Writer w a -> a
 getValFromWriter (Writer (a, _)) = a
 
@@ -46,3 +58,7 @@ fromDiffList (DiffList f) = f []
 -- useful to make the logs prettier
 showIndent :: (Show a) => a -> String
 showIndent obj = "\t" ++ replace "\n" "\n\t" (show obj)
+
+-- Takes the n first items from a DiffList.
+takeFromDiffList :: Int -> DiffList a -> [a]
+takeFromDiffList n diffList = take n (fromDiffList diffList)