-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsds.m
executable file
·77 lines (65 loc) · 2.2 KB
/
sds.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
%SDS
%
% [TARGET_DATA, OUTLIER_DATA] = SDS(X,K,THRESHOLD)
%
% INPUT
% X Dataset
% K Number of nearest neighbors ( default = ceil(5 * log10(size(x, 1))) )
% THRESHOLD Decision threshold (default = 0.1)
%
% OUTPUT
% TARGET_DATA Pseudo targets generated by shifting target data along positive direction
% OUTLIER_DATA Pseudo outliers generated by shifting edge patterns along negative direction
%
%
% DESCRIPTION
%
% Generate binary data to tune a one class classifier according to the following paper:
% Wang, S. et al. 2018. Hyperparameter selection of one-class support vector machine by self-adaptive data
% shifting. Pattern Recognition. 74, 2018.
function [target_data, outlier_data] = sds(x, k, threshold)
if nargin < 3 || isempty(threshold), threshold = 0.1; end
if nargin < 2 || isempty(k), k = ceil(5 * log10(size(x, 1))); end
if nargin < 1 || isempty(x)
error('Incorrect call to SDS: Empty dataset');
end
%make sure we have a OneClass dataset
x = +target_class(x);
%calculate the euclidian distance matrix
distmat = sqrt(sqeucldistm(x, x));
%sort the distances
[sD, idx] = sort(distmat, 2);
edge_idx = [];
norm_vec = [];
target_data = [];
l_ns = 0;
%finding the edge patterns
for i = 1:size(x, 1)
v_ij = repmat(x(i,:), k, 1) - x(idx(i, 2:k+1),:);
for j = 1:k
v_ij(j,:) = v_ij(j,:)/norm(v_ij(j,:));
end
n_i = sum(v_ij, 1);
theta_ij = sum(v_ij .* repmat(n_i, k, 1), 2);
l_i = 1/k * sum(theta_ij >= 0);
n_i = n_i/norm(n_i);
if l_i >= 1 - threshold
%add new edge
edge_idx = [edge_idx i];
norm_vec = [norm_vec; n_i];
l_ns = l_ns + (1 / k * sum(sD(i, 2:k+1)));
end
%generate pseudo targets
lambda_i_positive = sum(repmat(-n_i, k, 1) .* (x(idx(i, 2:k+1),:) - repmat(x(i,:), k, 1)),2);
if (length(lambda_i_positive(lambda_i_positive > 0)) > 0)
%shift along positive direction of data density gradient
x_ij_min_positive = min(lambda_i_positive(lambda_i_positive > 0));
target_data = [target_data; x(i,:) + x_ij_min_positive * -n_i];
end
end
l_ns = l_ns / length(edge_idx);
%generate pseudo outliers by shifting along negative direction
outlier_data = x(edge_idx,:) + norm_vec * l_ns;
target_data = gendatoc(target_data);
outlier_data = gendatoc([], outlier_data);
return