-
Notifications
You must be signed in to change notification settings - Fork 6
/
MLFRScgForClass_w3.m
374 lines (329 loc) · 13.8 KB
/
MLFRScgForClass_w3.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
% [feature_slct,feature_dependfirst,feature_dependlast,TTpes]=fuzzy_pes(data,target,over);
function [bestacc,besto,bestg,feature,cg,X,Y] = MLFRScgForClass_w3(train_data,train_target,test_data,test_target,omin,omax,gmin,gmax,ostep,gstep)
%SVMcg cross validation by faruto
%%
% by faruto
%Email:[email protected] QQ:516667408 http://blog.sina.com.cn/faruto BNU
%last modified 2010.01.17
%% 若转载请注明:
% faruto and liyang , LIBSVM-farutoUltimateVersion
% a toolbox with implements for support vector machines based on libsvm, 2009.
%
% Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for
% support vector machines, 2001. Software available at
% http://www.csie.ntu.edu.tw/~cjlin/libsvm
%% about the parameters of SVMcg
if nargin < 8
ostep = 0.0002;
gstep = 0.006;
end
if nargin < 7
gmax = 2;
gmin = 0.001;
end
if nargin < 5
omax = 0.0001;
omin = 0.0005;
end
%% X:c Y:g cg:CVaccuracy
[X,Y] = meshgrid(omax:-ostep:omin,gmin:gstep:gmax);
[m,n] = size(X);
cg = zeros(m,n);
%% record acc with different c & g,and find the bestacc with the smallest c
besto = 0;
bestg = 0.1;
bestacc = 0;
% basenum = 2;
for i = 1:m
for j = 1:n
% cmd = [ basenum^X(i,j) , basenum^Y(i,j) ];
cmd = [ X(i,j) , Y(i,j) ];
% feature_slct=fuzzy_pes(train_data,train_target,cmd);
feature_slct=fuzzy_w_imp(train_data,train_target,cmd);
feature{i,j}=feature_slct;
[Prior,PriorN,Cond,CondN]=MLKNN_train(train_data(:,feature_slct),train_target,10,1);
cg(i,j)=MLKNN_test(train_data(:,feature_slct),train_target,test_data(:,feature_slct),test_target,10,Prior,PriorN,Cond,CondN);
% if cg(i,j) < 1
% continue;
% end
%
% if cg(i,j) > bestacc
% bestacc = cg(i,j);
% besto = X(i,j);
% bestg = Y(i,j);
% end
%
% if abs( cg(i,j)-bestacc )<=eps && besto > X(i,j)
% bestacc = cg(i,j);
% besto = X(i,j);
% bestg = Y(i,j);
% end
end
end
function [feature_slct,feature_dependfirst,feature_dependlast]=fuzzy_w_imp(data,target,cmd)
% tic
%%%乐观算法 feature_dependfirst
%%%data归一化后的特征空间
%%%target标记为1和-1的标记空间
% profile on
%st=cputime;
over=cmd(1);
k=cmd(2);
target=target';
feature_slct=[];%选出特征
[n,m]=size(data);
[n,label]=size(target);
feature_lft=1:m;
num_cur=0;
stand=0;
TTimp=cell(1,1);
while num_cur<m
max_depend=0;
feature_depend=zeros(1,length(feature_lft));
for j=1:length(feature_lft)
store=zeros(n,label);
feature=[feature_slct feature_lft(j)];
data0=data(:,[feature]);
mm=size(data0,2);
for L=1:label
X=find(target(:,L)==1);%X表示在该L标记下为1的样本序号
Y=find(target(:,L)~=1);%Y表示在该L标记下不为1的样本序号
x=data0(X,:);%x表示在L标记下为1的新数据集
y=data0(Y,:);%y表示在L标记下部位1的新数据集
disM=exp((2 * x * y' - repmat(sqrt(sum(x .^ 2, 2) .^ 2), 1, size(y, 1)) - repmat(sqrt(sum(y .^ 2, 2)' .^ 2), size(x, 1), 1)) / k);
disH=exp((2 * x * x' - repmat(sqrt(sum(x .^ 2, 2) .^ 2), 1, size(x, 1)) - repmat(sqrt(sum(x .^ 2, 2)' .^ 2), size(x, 1), 1)) / k);
% dis=kernel(x,y,'linear');
% or=ones(length(X),length(Y));
% kernel_matrix=or-dis;
%min_hete=zeros(length(X),label);
for i=1:length(X)
temp=max(disM(i,:))-min(disH(i,:));
% if temp<store(X(i),L)
store(X(i),L)=temp;
% end
end
end
seq=zeros(n,1);
for p=1:n
seq(p,1)=sum(store(p,:))/label;
end
feature_depend(1,j)=sum(seq)/n;
if feature_depend(1,j)>max_depend
max_depend=feature_depend(1,j);
max_feature=j;
end
end
%%%%%%%%%%%%%悲观去最小%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%判断终止条件%%%%%%%%%%%%%%%%%%
terminal=max_depend-stand;
TTimp{1}=[TTimp{1},terminal];
% if terminal>=0.00%一个终止条件
% feature_slct=[feature_slct feature_lft(max_feature)];%选出的特征
% feature_lft(max_feature)=[];%去掉已经选出的特征
% % sample_lft(max_Pos)=[];%去掉已经在正域中的样本
% %%%%%%%%%%%%%%输出需要的依赖度值%%%%%%%%%%%%%
%
% % valueoflabel_first=zeros(1,length(valueoflabel));
% if num_cur==0
% feature_dependfirst=feature_depend;
% end
% if num_cur==m-1
% feature_dependlast=feature_depend;
% break;%一个终止条件
% end
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% else
% feature_dependlast=feature_depend;
% if num_cur==0
% feature_dependfirst=feature_dependlast;
% end
%
% break;
% end
if num_cur~=0
if terminal<over%一个终止条件
feature_dependlast=feature_depend;
if num_cur==0
feature_dependfirst=feature_dependlast;
end
break; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
else
feature_slct=[feature_slct feature_lft(max_feature)];%选出的特征
feature_lft(max_feature)=[];%去掉已经选出的特征
% sample_lft(max_Pos)=[];%去掉已经在正域中的样本
%%%%%%%%%%%%%%输出需要的依赖度值%%%%%%%%%%%%%
% valueoflabel_first=zeros(1,length(valueoflabel));
if num_cur==0
feature_dependfirst=feature_depend;
end
if num_cur==m-1
feature_dependlast=feature_depend;
break;%一个终止条件
end
end
else
feature_slct=[feature_slct feature_lft(max_feature)];
feature_lft(max_feature)=[];
feature_dependfirst=feature_depend;
end
stand=max_depend;
num_cur=num_cur+1;
end
%et=cputime-st
% profile viewer
% toc
function [Prior,PriorN,Cond,CondN]=MLKNN_train(train_data,train_target,Num,Smooth)
%MLKNN_train trains a multi-label k-nearest neighbor classifier
%
% Syntax
%
% [Prior,PriorN,Cond,CondN]=MLKNN_train(train_data,train_target,num_neighbor)
%
% Description
%
% KNNML_train takes,
% train_data - An MxN array, the ith instance of training instance is stored in train_data(i,:)
% train_target - A QxM array, if the ith training instance belongs to the jth class, then train_target(j,i) equals +1, otherwise train_target(j,i) equals -1
% Num - Number of neighbors used in the k-nearest neighbor algorithm
% Smooth - Smoothing parameter
% and returns,
% Prior - A Qx1 array, for the ith class Ci, the prior probability of P(Ci) is stored in Prior(i,1)
% PriorN - A Qx1 array, for the ith class Ci, the prior probability of P(~Ci) is stored in PriorN(i,1)
% Cond - A Qx(Num+1) array, for the ith class Ci, the probability of P(k|Ci) (0<=k<=Num) i.e. k nearest neighbors of an instance in Ci will belong to Ci , is stored in Cond(i,k+1)
% CondN - A Qx(Num+1) array, for the ith class Ci, the probability of P(k|~Ci) (0<=k<=Num) i.e. k nearest neighbors of an instance not in Ci will belong to Ci, is stored in CondN(i,k+1)
[num_class,num_training]=size(train_target);
%Computing distance between training instances
dist_matrix=diag(realmax*ones(1,num_training));
for i=1:num_training-1
if(mod(i,100)==0)
disp(strcat('computing distance for instance:',num2str(i)));
end
vector1=train_data(i,:);
for j=i+1:num_training
vector2=train_data(j,:);
dist_matrix(i,j)=sqrt(sum((vector1-vector2).^2));
dist_matrix(j,i)=dist_matrix(i,j);
end
end
%Computing Prior and PriorN
for i=1:num_class
temp_Ci=sum(train_target(i,:)==ones(1,num_training));
Prior(i,1)=(Smooth+temp_Ci)/(Smooth*2+num_training);
PriorN(i,1)=1-Prior(i,1);
end
%Computing Cond and CondN
Neighbors=cell(num_training,1); %Neighbors{i,1} stores the Num neighbors of the ith training instance
for i=1:num_training
[temp,index]=sort(dist_matrix(i,:));
Neighbors{i,1}=index(1:Num);
end
temp_Ci=zeros(num_class,Num+1); %The number of instances belong to the ith class which have k nearest neighbors in Ci is stored in temp_Ci(i,k+1)
temp_NCi=zeros(num_class,Num+1); %The number of instances not belong to the ith class which have k nearest neighbors in Ci is stored in temp_NCi(i,k+1)
for i=1:num_training
temp=zeros(1,num_class); %The number of the Num nearest neighbors of the ith instance which belong to the jth instance is stored in temp(1,j)
neighbor_labels=[];
for j=1:Num
neighbor_labels=[neighbor_labels,train_target(:,Neighbors{i,1}(j))];
end
for j=1:num_class
temp(1,j)=sum(neighbor_labels(j,:)==ones(1,Num));
end
for j=1:num_class
if(train_target(j,i)==1)
temp_Ci(j,temp(j)+1)=temp_Ci(j,temp(j)+1)+1;
else
temp_NCi(j,temp(j)+1)=temp_NCi(j,temp(j)+1)+1;
end
end
end
for i=1:num_class
temp1=sum(temp_Ci(i,:));
temp2=sum(temp_NCi(i,:));
for j=1:Num+1
Cond(i,j)=(Smooth+temp_Ci(i,j))/(Smooth*(Num+1)+temp1);
CondN(i,j)=(Smooth+temp_NCi(i,j))/(Smooth*(Num+1)+temp2);
end
end
function Average_Precision=MLKNN_test(train_data,train_target,test_data,test_target,Num,Prior,PriorN,Cond,CondN)
%MLKNN_test tests a multi-label k-nearest neighbor classifier.
%
% Syntax
%
% [HammingLoss,RankingLoss,OneError,Coverage,Average_Precision,Outputs,Pre_Labels]=MLKNN_test(train_data,train_target,test_data,test_target,Num,Prior,PriorN,Cond,CondN)
%
% Description
%
% KNNML_test takes,
% train_data - An M1xN array, the ith instance of training instance is stored in train_data(i,:)
% train_target - A QxM1 array, if the ith training instance belongs to the jth class, then train_target(j,i) equals +1, otherwise train_target(j,i) equals -1
% test_data - An M2xN array, the ith instance of testing instance is stored in test_data(i,:)
% test_target - A QxM2 array, if the ith testing instance belongs to the jth class, test_target(j,i) equals +1, otherwise test_target(j,i) equals -1
% Num - Number of neighbors used in the k-nearest neighbor algorithm
% Prior - A Qx1 array, for the ith class Ci, the prior probability of P(Ci) is stored in Prior(i,1)
% PriorN - A Qx1 array, for the ith class Ci, the prior probability of P(~Ci) is stored in PriorN(i,1)
% Cond - A Qx(Num+1) array, for the ith class Ci, the probability of P(k|Ci) (0<=k<=Num) i.e. k nearest neighbors of an instance in Ci will belong to Ci , is stored in Cond(i,k+1)
% CondN - A Qx(Num+1) array, for the ith class Ci, the probability of P(k|~Ci) (0<=k<=Num) i.e. k nearest neighbors of an instance not in Ci will belong to Ci, is stored in CondN(i,k+1)
% and returns,
% HammingLoss - The hamming loss on testing data
% RankingLoss - The ranking loss on testing data
% OneError - The one-error on testing data as
% Coverage - The coverage on testing data as
% Average_Precision- The average precision on testing data
% Outputs - A QxM2 array, the probability of the ith testing instance belonging to the jCth class is stored in Outputs(j,i)
% Pre_Labels - A QxM2 array, if the ith testing instance belongs to the jth class, then Pre_Labels(j,i) is +1, otherwise Pre_Labels(j,i) is -1
[num_class,num_training]=size(train_target);
[num_class,num_testing]=size(test_target);
%Computing distances between training instances and testing instances
dist_matrix=zeros(num_testing,num_training);
for i=1:num_testing
if(mod(i,100)==0)
disp(strcat('computing distance for instance:',num2str(i)));
end
vector1=test_data(i,:);
for j=1:num_training
vector2=train_data(j,:);
dist_matrix(i,j)=sqrt(sum((vector1-vector2).^2));
end
end
%Find neighbors of each testing instance
Neighbors=cell(num_testing,1); %Neighbors{i,1} stores the Num neighbors of the ith testing instance
for i=1:num_testing
[temp,index]=sort(dist_matrix(i,:));
Neighbors{i,1}=index(1:Num);
end
%Computing Outputs
Outputs=zeros(num_class,num_testing);
for i=1:num_testing
% if(mod(i,100)==0)
% disp(strcat('computing outputs for instance:',num2str(i)));
% end
temp=zeros(1,num_class); %The number of the Num nearest neighbors of the ith instance which belong to the jth instance is stored in temp(1,j)
neighbor_labels=[];
for j=1:Num
neighbor_labels=[neighbor_labels,train_target(:,Neighbors{i,1}(j))];
end
for j=1:num_class
temp(1,j)=sum(neighbor_labels(j,:)==ones(1,Num));
end
for j=1:num_class
Prob_in=Prior(j)*Cond(j,temp(1,j)+1);
Prob_out=PriorN(j)*CondN(j,temp(1,j)+1);
if(Prob_in+Prob_out==0)
Outputs(j,i)=Prior(j);
else
Outputs(j,i)=Prob_in/(Prob_in+Prob_out);
end
end
end
%Evaluation
Pre_Labels=zeros(num_class,num_testing);
for i=1:num_testing
for j=1:num_class
if(Outputs(j,i)>=0.5)
Pre_Labels(j,i)=1;
else
Pre_Labels(j,i)=-1;
end
end
end
Average_Precision=Average_precision(Outputs,test_target);