diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py index 2fff41f23c..ce9e759aad 100644 --- a/py/htm/encoders/eye.py +++ b/py/htm/encoders/eye.py @@ -158,7 +158,7 @@ def __init__(self, input_shape, num_samples, sparsity, def encode(self, img): """Returns a dense boolean np.ndarray.""" - assert(img.shape == self.input_shape) + assert(img.shape == self.input_shape),print("Channel: img must have same dims as input_shape:", img.shape, self.input_shape) assert(img.dtype == self.dtype) if self.wrap: img += self.offsets @@ -177,6 +177,11 @@ class Eye: Simulates functionality of eye's retinal parvocellular(P-cells), and magnocellular(M-cells) pathways, at the saccadic steps. + Based on OpenCV's cv2.bioinspired.Retina model: + https://docs.opencv.org/3.4/d2/d94/bioinspired_retina.html + http://web.iitd.ac.in/~sumeet/Modelling_Vision.pdf + + On high level, magno cells: - detect change in temporal information in the image, ie motion @@ -218,7 +223,7 @@ class Eye: def __init__(self, - output_diameter = 200, # output SDR size is diameter^2 + output_diameter = 200, # fovea image size, also approximately output SDR size (= diameter^2) sparsityParvo = 0.2, sparsityMagno = 0.025, color = True,): @@ -238,45 +243,44 @@ def __init__(self, motion detection and motion tracking, video processing. For details see @param `sparsityParvo`. TODO: output of M-cells should be processed on a fast TM. - Argument color: use color vision (requires P-cells > 0), default true. + Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster) """ self.output_diameter = output_diameter # Argument resolution_factor is used to expand the sensor array so that # the fovea has adequate resolution. After log-polar transform image # is reduced by this factor back to the output_diameter. - self.resolution_factor = 3 - self.retina_diameter = int(self.resolution_factor * output_diameter) - # Argument fovea_scale ... represents "zoom" aka distance from the object/image. - self.fovea_scale = 0.177 + resolution_factor = 2 assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number. - assert(self.retina_diameter // 2 * 2 == self.retina_diameter) # (Resolution Factor X Diameter) must be an even number. assert(sparsityParvo >= 0 and sparsityParvo <= 1.0) - if sparsityParvo > 0: - assert(sparsityParvo * (self.retina_diameter **2) > 0) self.sparsityParvo = sparsityParvo assert(sparsityMagno >= 0 and sparsityMagno <= 1.0) - if sparsityMagno > 0: - assert(sparsityMagno * (self.retina_diameter **2) > 0) self.sparsityMagno = sparsityMagno if color is True: assert(sparsityParvo > 0) self.color = color - self.retina = cv2.bioinspired.Retina_create( - inputSize = (self.retina_diameter, self.retina_diameter), + inputSize = (resolution_factor*output_diameter, resolution_factor*output_diameter), colorMode = color, - colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER,) + colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER, + useRetinaLogSampling = True, + reductionFactor = 1.2, #!reductionFactor_, # how much is the image under-sampled #TODO tune these params + samplingStrenght = 4.0, # how much are the corners blured/forgotten + ) + + # Activate Parvo/Magno vision based on whether sparsityXXX is set. + self.retina.activateContoursProcessing(sparsityParvo > 0) # Parvo + self.retina.activateMovingContoursProcessing(sparsityMagno > 0) # Magno print(self.retina.printSetup()) print() if sparsityParvo > 0: - dims = (output_diameter, output_diameter) + dims = self.retina.getOutputSize() sparsityP_ = sparsityParvo - if color is True: - dims = (output_diameter, output_diameter, 3,) #3 for RGB color channels + if color is True: + dims = dims +(3,) #append 3rd dim with value '3' for RGB color channels # The reason the parvo-cellular has `3rd-root of the sparsity` is that there are three color channels (RGB), # each of which is encoded separately and then combined. The color channels are combined with a logical AND, @@ -295,7 +299,7 @@ def __init__(self, if sparsityMagno > 0: self.magno_enc = ChannelEncoder( - input_shape = (output_diameter, output_diameter), + input_shape = self.retina.getOutputSize(), num_samples = 1, sparsity = sparsityMagno, dtype=np.uint8, drange=[0, 255],) @@ -303,69 +307,80 @@ def __init__(self, self.magno_enc = None # output variables: - self.image = None # the current input RGB image + self.image = np.zeros(self.retina.getInputSize()) # the current input RGB image self.roi = None # self.image cropped to region of interest - self.parvo_img = None # output visualization of parvo/magno cells - self.magno_img = None - self.parvo_sdr = SDR((output_diameter, output_diameter,)) # parvo/magno cellular representation (SDR) - self.magno_sdr = SDR((output_diameter, output_diameter,)) - - - def new_image(self, image): + #self.retina.getParvo() # output visualization of parvo/magno cells + #self.retina.getMagno() + self.parvo_sdr = SDR(self.retina.getOutputSize()) # parvo/magno cellular representation (SDR) + self.magno_sdr = SDR(self.retina.getOutputSize()) + + # Motor-control variables (to be set by user): + self.orientation = 0 #in degrees + self.position = (0,0) + self.scale = 1.0 + + + def new_image_(image): """ Argument image ... If String, will load image from file path. If numpy.ndarray, will attempt to cast to correct data type and dimensions. + + Return: the new image ndarray (only useful if string is passed in) """ # Load image if needed. if isinstance(image, str): - self.image = cv2.imread(image) - self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) + image = cv2.imread(image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) else: - self.image = image + image = image # Get the image into the right format. - assert(isinstance(self.image, np.ndarray)) - if self.image.dtype != np.uint8: - raise TypeError('Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%( - self.image.dtype)) + assert(isinstance(image, np.ndarray)) + assert(image.dtype == np.uint8), print( + 'Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%(image.dtype)) # Ensure there are three color channels. - if len(self.image.shape) == 2 or self.image.shape[2] == 1: - self.image = np.dstack([self.image] * 3) + if len(image.shape) == 2 or image.shape[2] == 1: + image = np.dstack([image] * 3) # Drop the alpha channel if present. - elif self.image.shape[2] == 4: - self.image = self.image[:,:,:3] + elif image.shape[2] == 4: + image = image[:,:,:3] # Sanity checks. - assert(len(self.image.shape) == 3) - assert(self.image.shape[2] == 3) # Color images only. - self.reset() - self.center_view() + assert(len(image.shape) == 3) + assert(image.shape[2] == 3) # Color images only. + return image + def center_view(self): """Center the view over the image""" self.orientation = 0 self.position = (self.image.shape[0]/2., self.image.shape[1]/2.) - self.scale = np.min(np.divide(self.image.shape[:2], self.retina_diameter)) + self.scale = np.min(np.divide(self.image.shape[:2], self.retina.getInputSize()[0])) def randomize_view(self, scale_range=None): """Set the eye's view point to a random location""" if scale_range is None: - scale_range = [2, min(self.image.shape[:2]) / self.retina_diameter] + scale_range = [2, min(self.image.shape[:2]) / self.retina.getInputSize()[0]] assert(len(scale_range) == 2) self.orientation = random.uniform(0, 2 * math.pi) self.scale = random.uniform(min(scale_range), max(scale_range)) - roi_radius = self.scale * self.retina_diameter / 2 + roi_radius = self.scale * self.retina.getInputSize()[0] / 2 self.position = [random.uniform(roi_radius, dim - roi_radius) for dim in self.image.shape[:2]] + def rotate_(self, image, angle): + image_center = tuple(np.array(image.shape[1::-1]) / 2) + rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) + result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR) + return result + + def _crop_roi(self): """ Crop to Region Of Interest (ROI) which contains the whole field of view. Adds a black circular boarder to mask out areas which the eye can't see. - Note: size of the ROI is (eye.output_diameter * eye.resolution_factor). - Note: the circular boarder is actually a bit too far out, playing with - eye.fovea_scale can hide areas which this ROI image will show. + Note: size of the ROI is (eye.retina.getOutputSize()[0] * resolution_factor). Arguments: eye.scale, eye.position, eye.image @@ -376,7 +391,7 @@ def _crop_roi(self): """ assert(self.image is not None) - r = int(round(self.scale * self.retina_diameter / 2)) + r = int(round(self.scale * self.retina.getInputSize()[0] / 2)) x, y = self.position x = int(round(x)) y = int(round(y)) @@ -403,18 +418,16 @@ def _crop_roi(self): roi[x_offset:x_offset+x_shape, y_offset:y_offset+y_shape] = image_slice # Rescale the ROI to remove the scaling effect. - roi.resize( (self.retina_diameter, self.retina_diameter, 3)) + inDims_ = self.retina.getInputSize() + inDims_ = inDims_ + (3,) #add 3rd dim '3' for RGB + roi.resize( inDims_ ) - # Mask out areas the eye can't see by drawing a circle boarder. - center = int(roi.shape[0] / 2) - circle_mask = np.zeros(roi.shape, dtype=np.uint8) - cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255)) - roi = np.minimum(roi, circle_mask) return roi - def compute(self, position=None, rotation=None, scale=None): + def compute(self, image, position=None, rotation=None, scale=None): """ + Argument image - string (to load) or numpy.ndarray with image data Arguments position, rotation, scale: optional, if not None, the self.xxx is overriden with the provided value. Returns tuple (SDR parvo, SDR magno) @@ -427,55 +440,49 @@ def compute(self, position=None, rotation=None, scale=None): if scale is not None: self.scale=scale - # apply field of view (FOV) + # apply field of view (FOV) & rotation + self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform + self.image = self.rotate_(self.image, rotation) self.roi = self._crop_roi() # Retina image transforms (Parvo & Magnocellular). self.retina.run(self.roi) + if self.parvo_enc is not None: parvo = self.retina.getParvo() if self.magno_enc is not None: magno = self.retina.getMagno() # Log Polar Transform. - center = self.retina_diameter / 2 - M = self.retina_diameter * self.fovea_scale + center = self.retina.getInputSize()[0] / 2 + M = self.retina.getInputSize()[0] * self.scale if self.parvo_enc is not None: parvo = cv2.logPolar(parvo, center = (center, center), M = M, flags = cv2.WARP_FILL_OUTLIERS) - parvo = cv2.resize(parvo, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC) if self.magno_enc is not None: magno = cv2.logPolar(magno, center = (center, center), M = M, flags = cv2.WARP_FILL_OUTLIERS) - magno = cv2.resize(magno, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC) - # Apply rotation by rolling the images around axis 1. - rotation = self.output_diameter * self.orientation / (2 * math.pi) - rotation = int(round(rotation)) - if self.parvo_enc is not None: - self.parvo_img = np.roll(parvo, rotation, axis=0) - if self.magno_enc is not None: - self.magno_img = np.roll(magno, rotation, axis=0) # Encode images into SDRs. - p = [] - m = [] if self.parvo_enc is not None: p = self.parvo_enc.encode(parvo) if self.color: pr, pg, pb = np.dsplit(p, 3) p = np.logical_and(np.logical_and(pr, pg), pb) p = np.expand_dims(np.squeeze(p), axis=2) + self.parvo_sdr.dense = p.flatten() if self.magno_enc is not None: m = self.magno_enc.encode(magno) + self.magno_sdr.dense = m.flatten() + - self.magno_sdr.dense = m.flatten() - self.parvo_sdr.dense = p.flatten() + self.parvo_img = parvo assert(len(self.magno_sdr.sparse) > 0) assert(len(self.parvo_sdr.sparse) > 0) @@ -493,13 +500,6 @@ def make_roi_pretty(self, roi=None): if roi is None: roi = self.roi - # Show the ROI, first rotate it like the eye is rotated. - angle = self.orientation * 360 / (2 * math.pi) - roi = self.roi[:,:,::-1] - rows, cols, color_depth = roi.shape - M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1) - roi = cv2.warpAffine(roi, M, (cols,rows)) - # Invert 5 pixels in the center to show where the fovea is located. center = int(roi.shape[0] / 2) roi[center, center] = np.full(3, 255) - roi[center, center] @@ -507,6 +507,12 @@ def make_roi_pretty(self, roi=None): roi[center-2, center+2] = np.full(3, 255) - roi[center-2, center+2] roi[center-2, center-2] = np.full(3, 255) - roi[center-2, center-2] roi[center+2, center-2] = np.full(3, 255) - roi[center+2, center-2] + + # Mask out areas the eye can't see by drawing a circle boarder. + center = int(roi.shape[0] / 2) + circle_mask = np.zeros(roi.shape, dtype=np.uint8) + cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255)) + roi = np.minimum(roi, circle_mask) return roi @@ -514,13 +520,13 @@ def plot(self, window_name='Eye', delay=1000): roi = self.make_roi_pretty() cv2.imshow('Region Of Interest', roi) if self.color: - cv2.imshow('Parvocellular', self.parvo_img[:,:,::-1]) + cv2.imshow('Parvocellular', self.retina.getParvo()[:,:,::-1]) else: - cv2.imshow('Parvocellular', self.parvo_img) - cv2.imshow('Magnocellular', self.magno_img) - idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255 + cv2.imshow('Parvocellular', self.retina.getParvo()) + cv2.imshow('Magnocellular', self.retina.getMagno()) + idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255 cv2.imshow('Parvo SDR', idx) - idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255 + idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255 cv2.imshow('Magno SDR', idx) cv2.waitKey(delay) @@ -529,7 +535,7 @@ def small_random_movement(self): """returns small difference in position, rotation, scale. This is naive "saccadic" movements. """ - max_change_angle = (2*3.14159) / 500 + max_change_angle = (2*math.pi) / 100 self.position = ( self.position[0] + random.gauss(1, .75), self.position[1] + random.gauss(1, .75),) @@ -581,16 +587,18 @@ def _get_images(path): print('No images found at file path "%s"!'%args.IMAGE) else: eye = Eye() + for img_path in images: eye.reset() print("Loading image %s"%img_path) - eye.new_image(img_path) - eye.scale = 1 - eye.center_view() + #eye.center_view() + #manually set position to look at head: + eye.position = (400, 400) for i in range(10): pos,rot,sc = eye.small_random_movement() - (sdrParvo, sdrMagno) = eye.compute(pos,rot,sc) #TODO derive from Encoder - eye.plot(500) + sc = 1.0 #FIXME scaling with any other than 1.0 breaks plots + (sdrParvo, sdrMagno) = eye.compute(img_path, pos,rot,sc) #TODO derive from Encoder + eye.plot(delay=1500) print("Sparsity parvo: {}".format(len(eye.parvo_sdr.sparse)/np.product(eye.parvo_sdr.dimensions))) print("Sparsity magno: {}".format(len(eye.magno_sdr.sparse)/np.product(eye.magno_sdr.dimensions))) print("All images seen.")