From f8a5b05c118c5888bd0cb19b1cb426eb26df121f Mon Sep 17 00:00:00 2001 From: David Barda Date: Sun, 6 Nov 2016 16:56:50 +0200 Subject: [PATCH] Changed some comments and a lazy evaluation example function --- EllipsoidSpec.hs | 10 ++++--- KNNSpec.hs | 2 +- Label.hs | 10 +++++++ Learner.hs | 7 +++++ Loss.hs | 3 ++ OnlineLearner.hs | 2 +- Project Specifications | 68 +++++++++++++++++++++++------------------- VectorUtils.hs | 2 ++ Writer.hs | 22 ++++++++++++-- 9 files changed, 87 insertions(+), 39 deletions(-) diff --git a/EllipsoidSpec.hs b/EllipsoidSpec.hs index 9916595..6fc940e 100644 --- a/EllipsoidSpec.hs +++ b/EllipsoidSpec.hs @@ -1,5 +1,5 @@ module EllipsoidSpec where --- This module that contains tests for the ellipsoid learner +-- This module contains tests for the ellipsoid learner import Label import Writer @@ -8,9 +8,10 @@ import Test.Hspec import OnlineLearner import Data.Matrix --- The Ellipsoid knowledge contains fractions, like eta, a and w. This can cause a problem when comparing two ellipsoids --- which should contain the same values, but because of a different rounding don't. This function that rounds all the --- fractions in the knowledge to N digits to enable comparison. +-- The Ellipsoid knowledge contains fractions, like eta, a and w. This can cause +-- a problem when comparing two ellipsoids which should contain the same values, +-- but because of a different rounding appear to be different. +-- This function rounds all the fractions in the knowledge to N digits to enable comparison. roundKnowledgeToNDigits :: Int -> TrainingKnowledge -> TrainingKnowledge roundKnowledgeToNDigits digits (EllipsoidKnowledge d eta' a' w') = EllipsoidKnowledge d roundedEta roundedA roundedW where @@ -75,6 +76,7 @@ main = do describe "Batch tests" $ do it "Train on one example" $ roundAndCompare batchTrainKnowledge train1Knowledge + it "Train on a lot examples" $ do let a' = fromList 2 2 [0.65843621, -0.39506173, -0.39506173, 0.55308642] diff --git a/KNNSpec.hs b/KNNSpec.hs index 2a50d67..4481043 100644 --- a/KNNSpec.hs +++ b/KNNSpec.hs @@ -1,5 +1,5 @@ module KNNSpec where --- This module that contains tests for the KNN learner +-- This module contains tests for the KNN learner import Test.Hspec import Learner diff --git a/Label.hs b/Label.hs index 7ae5cf3..78f7c25 100644 --- a/Label.hs +++ b/Label.hs @@ -1,3 +1,13 @@ +-- This module represents the possible labels that an example can have. +-- We chose Double and Int because these are the most basic labels possible - +-- A double can be used for most regression problems (up to a specific precision or scale). +-- An int can be used for more classification problems. +-- We've implemented this module with extensibility in mind. +-- We implemented the LabelType data in order to be able to check the type of a +-- specific label - is it a Double, or an Int? +-- This is important for the KNN leaner that can either function as a regression +-- learner or a classifier. + module Label(Label(..), LabelType(..), labelToDouble, labelToInt) where -- LabelType data type represents the type of the labels that can be returned by diff --git a/Learner.hs b/Learner.hs index e5a7cca..07a45c5 100644 --- a/Learner.hs +++ b/Learner.hs @@ -1,3 +1,5 @@ +-- A module that bundles all the offline learners, at the moment as an example contains +-- both classification and regression KNN. module Learner(TrainingKnowledge, LearningParameters(..), LearnerParameters(..), train, classify, Learner.error) where @@ -31,9 +33,14 @@ train classifier@(LearningParameters (KNN _ _) _) xs ys = KNNKnowledge xs ys c -- A function that is used to classify a new example using the "knowledge" obtained by the train proccess. classify :: TrainingKnowledge -> ExampleType -> Label + +-- classify for KNN regression +-- An average of the labels of the k closest neighbors among the training set classify knowledge@(KNNKnowledge _ _ (LearningParameters knn DoubleType)) toClassify = LDouble (sum (map labelToDouble $ labelOfClosestNeighbors knowledge toClassify) / fromIntegral (k knn)) +-- classify for KNN classification +-- Returns the label that appears the most among the k closest neighbors of the training set classify knowledge@(KNNKnowledge _ _ (LearningParameters _ IntType)) toClassify = LInt result where result = snd $ maximumBy (comparing fst) $ zip (map length labelesGroupedByValue) labelsValues diff --git a/Loss.hs b/Loss.hs index ed41d2c..2e77b7e 100644 --- a/Loss.hs +++ b/Loss.hs @@ -1,3 +1,6 @@ +-- This module houses the loss functions used in our various learners. +-- More loss functions can be added to it easily. + module Loss(binaryLoss, quadriaticLoss, Loss) where import Label diff --git a/OnlineLearner.hs b/OnlineLearner.hs index 50e2ffc..54c77bd 100644 --- a/OnlineLearner.hs +++ b/OnlineLearner.hs @@ -6,7 +6,7 @@ import Writer import Loss import Data.Matrix --- Example represents the examples that online learners can receive +-- Example represents an example that online learners can receive type Example = Matrix Double -- LearnerParameters data type represents the specific parameters for each learner. diff --git a/Project Specifications b/Project Specifications index 19e757e..93e7672 100644 --- a/Project Specifications +++ b/Project Specifications @@ -1,46 +1,54 @@ Specification ------------------ -EllipsoidSpec.hs: -KNNSpec.hs: -Label.hs: -Learner.hs: -Loss.hs: -OnlineLearner.hs: -Project Speficiation: this file. -VectorUtils.hs: -Writer.hs: - -lazy evaluation - To be checked -Pattern matching - Multiple functions, Learner.hs error i.e -higher order functions - Multiple functions, Learner.hs error i.e -new types - Multiple places, KNNKnowledge in OnlineLearner.hs i.e -derived - Multiple places, Label derived Show in Label.hs (also instance of Ord and Eq) -lambda functions - Where DiffList instantiate Monoid at Writer.hs. -applicative functor - Writer.hs Monad in Writer.hs (instance of applicative as it is a monad). -monoids - DiffList in Writer.hs instantiate Monoid. + +Files +------------------ +EllipsoidSpec.hs +KNNSpec.hs +Label.hs +Learner.hs +Loss.hs +OnlineLearner.hs +Project Speficiation +VectorUtils.hs +Writer.hs + +Idioms we used +------------------ +Pattern matching - classify in Learner.hs, Eq in Label.hs, etc'. +Higher order functions - error in Learner.hs, doing `mappend` between DiffLists + creates a new function in the DiffList (there is a concatenation of functions), etc'. +New types - LearningParameters, TrainingKnowledge in Learner.hs, etc'. +Derived - Label derived Show in Label.hs (also instance of Ord and Eq). +Lambda functions - We use lambdas where DiffList instantiate Monoid at Writer.hs (line 37). +Applicative functor - Writer monad in Writer.hs (instance of applicative as it is a monad). +Monoids - DiffList in Writer.hs instantiate Monoid. +Lazy evaluation - takeFromDiffList in DiffList, Printing a DiffList is lazy, etc'. Functionality Pros ------------------ -1) High order functions make the ability to get functions that get as many +1) Writing functional code is like writing math! + +2) High order functions make the ability to get functions that get as many parameters as needed and returns function without complicated function pointers. -2) It was easy to write the tests because each function can be isolated +3) It was easy to write the tests because each function can be isolated as it has its specific arguments and return value without any relation to a -class or globals. +class or global variables. -3) It was easier to debug as the functions are stateless and +4) It was easier to debug as the functions are stateless and no members or in function variables could be a reason for bugs. +5) Code usually is much shorter. + Functionality Cons ------------------ -1) Its hard to predict the spaced used in the program (Due to lazyness 4 can actually be 2+2 which take more bits to represent). -2) Its hard to compare the running time in relation to a given imperative code(Most of existing code is written by imperative languages). -3) As imperative programmers its hard to learn and master(Due to the differences between the methodology). +1) Its hard to predict the spaced used in the program (Due to lazyness 4 can +actually be 2+2 which take more bits to represent). +2) Its hard to compare the running time in relation to a given imperative code +(most of the existing code is written by imperative languages). -Remaining Tasks(Temp) ------------------- -Add error to OnlineLearner -Refactor Learner? -Finish Project Specification +3) As imperative programmers its hard to learn and master (due to the differences +between imperative and functional languages). diff --git a/VectorUtils.hs b/VectorUtils.hs index 919edd8..51bf873 100644 --- a/VectorUtils.hs +++ b/VectorUtils.hs @@ -1,3 +1,5 @@ +-- Various utilities used throughout our project. + module VectorUtils(distanceFromVector, lNorm, Norm) where -- Norm is a type that represents a norm function diff --git a/Writer.hs b/Writer.hs index f20e360..25b9e03 100644 --- a/Writer.hs +++ b/Writer.hs @@ -1,4 +1,8 @@ -module Writer (Writer(..), DiffList(..), tell, toDiffList, fromDiffList, getValFromWriter, showIndent) where +-- A module of a writer, which is capable of chaining logs and values in a "monadic" way. + +module Writer (Writer(..), DiffList(..), tell, toDiffList, fromDiffList, + getValFromWriter, showIndent, takeFromDiffList) where + import Control.Monad (liftM, ap) import Data.String.Utils @@ -10,13 +14,21 @@ newtype Writer w a = Writer { runWriter :: (a, w) } -- https://wiki.haskell.org/Difference_list newtype DiffList a = DiffList { getDiffList :: [a] -> [a] } +-- Every Monad is an Applicative, so that means Monads are also Functors. +-- liftM is fmap implemented with return and (>>=), and thus allows us to +-- expose the Functor within the Monad instance (Monoid w) => Functor (Writer w) where fmap = liftM +-- Since GHC 7.10 Applicative was defined as a superclass of Monad ------------- instance (Monoid w) => Applicative (Writer w) where pure = return (<*>) = ap +-------------------------------------------------------------------------------- +-- return should wrap a value x with our Writer monad +-- (Writer (x, v)) >>= f should activate f on x, getting a value y, append the +-- new log v' to v, and return Writer (y, v 'mappend' v') instance (Monoid w) => Monad (Writer w) where return x = Writer (x, mempty) (Writer (x, v)) >>= f = let (Writer (y, v')) = f x in Writer (y, v `mappend` v') @@ -26,11 +38,11 @@ instance Monoid (DiffList a) where (DiffList f) `mappend` (DiffList g) = DiffList (f.g) -- A function that is used to create a writer with "dummy" values so a simple text --- will be writter to the log. +-- will be written to the log. tell :: w -> Writer w () tell x = Writer ((), x) --- A function that is used to get the resulted value from the writer. +-- A function that is used to get the result value from the writer. getValFromWriter :: Writer w a -> a getValFromWriter (Writer (a, _)) = a @@ -46,3 +58,7 @@ fromDiffList (DiffList f) = f [] -- useful to make the logs prettier showIndent :: (Show a) => a -> String showIndent obj = "\t" ++ replace "\n" "\n\t" (show obj) + +-- Takes the n first items from a DiffList. +takeFromDiffList :: Int -> DiffList a -> [a] +takeFromDiffList n diffList = take n (fromDiffList diffList)