forked from paobranco/UBL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneighbours.f90
414 lines (345 loc) · 11.3 KB
/
neighbours.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
!============================================================
!
! Neighbours evaluation for a selected distance metric
!
!============================================================
subroutine F_neighbours(tgtData, numData, nomData, p, k, n, nnum,&
nnom, Cl, distm, numD, res)
!----------------------------------------------------------------
! Subroutine neighbours is used for obtaining the nearest
! neighbours of a set of examples using a specified distance metric.
!----------------------------------------------------------------
! tgtData(1,n) :: input, real values, the target variable values
! numData(nnum,n) :: input, real values, the nnum numeric features
! values of the n cases.
! nomData(nnom,n) :: input, integer values, the nnom nominal features
! values (coded as integers) of the n cases.
! p :: input, integer, code for the distance metric:
! p=-1 -> Canberra
! p=-2 -> Overlap
! p=-3 -> HEOM
! p=-4 -> HVDM
! p=0 -> Chebyshev
! p=1 -> Manhattan or 1-norm
! p=2 -> Euclidean or 2-norm
! p>2 -> p-norm
! k :: input, integer, number of nearest neighbours
! n :: input, integer, number of examples in the data
! nnum :: input, integer, number of numeric attributes
! nnom :: input, integer, number of nominal attributes
! Cl :: input, integer, number of different classes in
! the target variable values
! distm(n,n) :: input/output, real values, distance matrix
! numD(nnum,n) :: input/output, real values, copy of numData
! res(k,n) :: input/output, integer,
!----------------------------------------------------------------
! Author:: P.Branco 2015.06.01 and 2016.04.10 -------------------
!----------------------------------------------------------------
! Dependence:: Inner subroutine callpnorm;
! Inner subroutine callChebyshev;
! Inner subroutine callHVDM;
! Inner subroutine callHEOM;
! Inner subroutine callCanberra;
! Inner subroutine callOverlap;
!----------------------------------------------------------------
! References:: for HVDM distance:
! Wilson, D.R. and Martinez, T.R., 1997.
! Improved heterogeneous distance functions.
! Journal of artificial intelligence research, pp.1-34.
!----------------------------------------------------------------
implicit none
!Arguments
integer (kind=4), intent(in) :: p, k, n, nnum, nnom, Cl
real (kind=8), intent(in) :: tgtData(1,n)
real (kind=8), intent(in) :: numData(nnum,n)
integer (kind=4), intent(in) :: nomData(nnom,n)
integer(kind=4), intent(inout) :: res(k,n)
real (kind=8), intent(inout) :: distm(n,n)
real (kind=8), intent(inout) :: numD(nnum,n)
!Local Variables
integer(kind=4) :: i,j,l,bestIndex
integer(kind=4) :: used(n)
real (kind=8) :: bestDist
real (kind=8) :: ranges(nnum,2)
real (kind=8) :: mean(nnum), sd(nnum)
distm=0.0d0
numD=numData
if (p >= 1) then ! p-norm
call callpNorm(p, distm, numD, nnum, n)
else if (p == 0) then
call callChebyshev(distm, numD, nnum, n)
else if (p == -1) then
call callCanberra(distm, numD, nnum, n)
else if (p == -2) then ! overlap metric: only for nominal attributes
call callOverlap(distm, nomData, nnom, n)
else if (p == -3) then
call callHEOM(distm, numD, nnum, nomData, nnom, n)
else if (p == -4) then
call callHVDM(distm, numD, nomData, nnum, nnom, tgtData, n, Cl)
! still to be implemented
! else if(p==-5) then
! call callDVDM(distm, numD, nomData, nnum, nnom, tgtData, n, Cl)
! else if(p==-6) then
! call callIVDM(distm, numD, nomData, nnum, nnom, tgtData, n, Cl)
! else if(p==-7) then
! call callWVDM(distm, numD, nomData, nnum, nnom, tgtData, n, Cl)
! else if(p==-8) then
! call callMVDM(distm, numD, nomData, nnum, nnom, tgtData, n, Cl)
end if
! calculate k nearest neighbours with dist matrix
do i=1,n
used = 0
used(i) = 1 ! the example is not a neighbour of himself
do j=1,k
bestDist = huge(bestDist)
bestIndex = -1
do l=1,n
if (distm(l,i) < bestDist .and. used(l) == 0) then
bestDist = distm(l,i)
bestIndex = l
end if
end do
used(bestIndex) = 1
res(j,i) = bestIndex
end do
end do
contains
subroutine callpNorm(p, distm, numD, nnum, n)
implicit none
!Arguments
integer (kind=4), intent(in) :: n, nnum, p
real (kind=8), intent(in) :: numD(nnum,n)
real (kind=8), intent(inout) :: distm(n,n)
!Local Variables
integer(kind=4) :: i,j
do j=1,n-1
do i=j+1,n
distm(i,j) = distm(i,j) + pNorm(p, numD(:,j), numD(:,i), nnum)
distm(j,i) = distm(i,j)
end do
end do
end subroutine callpNorm
subroutine callChebyshev(distm, numD, nnum, n)
implicit none
!Arguments
integer (kind=4), intent(in) :: n, nnum
real (kind=8), intent(in) :: numD(nnum,n)
real (kind=8), intent(inout) :: distm(n,n)
!Local Variables
integer(kind=4) :: i, j
do j=1,n-1
do i=j+1,n
distm(i,j) = distm(i,j) + chebyshev(numD(:,j), numD(:,i), nnum)
distm(j,i) = distm(i,j)
end do
end do
end subroutine callChebyshev
subroutine callHVDM(distm, numD, nomData, nnum, nnom, tgtData, n, Cl)
implicit none
!Arguments
integer(kind=4), intent(in) :: nnum, nnom, n, Cl
real(kind=8), intent(inout) :: distm(n,n)
real (kind=8), intent(in) :: numD(nnum,n)
integer(kind=4), intent(in) :: nomData(nnom,n)
real (kind=8), intent(in) :: tgtData(n)
!Local Variables
integer(kind=4) :: i,j
real (kind=8):: sd(nnum), mean(nnum)
sd = 0.0d0
mean = 0.0d0
if (nnum /= 0) then
do i=1,nnum
mean(i) = sum(numD(i,:))/n
sd(i) = sqrt(sum((numD(i,:)-mean(i))**2) / (n-1))
end do
end if
do j=1,n-1
do i=j,n
distm(i,j) = distm(i,j) + &
HVDM(numD(:,j), numD(:,i), sd, nomData(:,:),&
nnum, nnom, tgtData, n, i, j, Cl)
distm(j,i) = distm(i,j)
end do
end do
end subroutine callHVDM
subroutine callOverlap(distm, nomData, nnom, n)
implicit none
!Arguments
integer(kind=4), intent(in) :: n, nnom
integer(kind=4), intent(in) :: nomData(nnom,n)
real(kind=8), intent(inout) :: distm(n,n)
!Local Variables
integer :: i, j
do j=1,n-1
do i=j+1,n
distm(i,j) = distm(i,j) +&
overlap(nomData(:,j), nomData(:,i), nnom)
distm(j,i)=distm(i,j)
end do
end do
end subroutine callOverlap
subroutine callCanberra(distm, numD, nnum, n)
implicit none
!Arguments
integer(kind=4), intent(in) :: n, nnum
real (kind=8), intent(in) :: numD(nnum,n)
real(kind=8), intent(inout) :: distm(n,n)
!Local Variables
integer(kind=4) :: i, j
do j=1,n-1
do i=j+1,n
distm(i,j) = distm(i,j) + canberra(numD(:,j), numD(:,i), nnum)
distm(j,i) = distm(i,j)
end do
end do
end subroutine callCanberra
! subroutines still to be implemented
!subroutine callWVDM(distm)
!end subroutine callWVDM
!subroutine callIVDM(distm)
!end subroutine callIVDM
!subroutine callMVDM(distm)
!end subroutine callMVDM
subroutine callHEOM(distm, numD, nnum, nomData, nnom, n)
implicit none
!Arguments
integer(kind=4), intent(in) :: n, nnum, nnom
real (kind=8), intent(in) :: numD(nnum,n)
integer(kind=4), intent(in) :: nomData(nnom,n)
real(kind=8), intent(inout) :: distm(n,n)
!Local Variables
integer(kind=4) :: i, j
real (kind=8) :: ranges(nnum)
do i=1,nnum
ranges(i) = maxval(numD(i,:)) - minval(numD(i,:))
end do
do j=1,n-1
do i=j+1,n
distm(i,j) = distm(i,j) + sqrt(HEOMnum(numD(:,j), &
numD(:,i), nnum, ranges) + &
overlap(nomData(:,j), nomData(:,i), nnom))
distm(j,i) = distm(i,j)
end do
end do
end subroutine callHEOM
!!functions definitions
double precision function pNorm(p,a,b,d)
implicit none
integer(kind=4), intent(in) :: p, d
real (kind=8), intent(in) :: a(d), b(d)
pNorm = (sum((abs(a-b))**p))**(1.0/dble(p))
end function pNorm
double precision function HEOMnum(a,b,d,ranges)
implicit none
integer(kind=4), intent(in) :: d
real (kind=8), intent(in) :: a(d), b(d), ranges(d)
integer(kind=4) :: i
! Epsilon value
real (kind=8), PARAMETER :: eps = 1d-30
HEOMnum =0.0d0
do i=1, d
! HEOMnum = sum((abs(a-b)/ranges)**2)
if (ranges(i) > eps) then
HEOMnum = HEOMnum +(abs(a(i)-b(i))/ranges(i))**2
end if
end do
end function HEOMnum
double precision function chebyshev(a,b,d)
implicit none
integer(kind=4), intent(in) :: d
real (kind=8), intent(in) :: a(d), b(d)
chebyshev = maxval(abs(a-b))
end function chebyshev
double precision function overlap(a,b,d)
implicit none
integer(kind=4), intent(in) :: d
integer(kind=4), intent(in) :: a(d), b(d)
integer (kind=4) :: i,s
s = 0
do i=1,d
if(a(i) /= b(i))then
s = s + 1
end if
end do
overlap = s
end function overlap
double precision function canberra(a,b,d)
implicit none
integer(kind=4), intent(in) :: d
real (kind=8), intent(in) :: a(d), b(d)
canberra = sum(abs(a-b)/(abs(a)+abs(b)))
end function canberra
! Heterogenous Value Difference Metric
double precision function HVDM(numa, numb, sd, nom, dimnum, dimnom, tgtData, n, i, j, Cl)
implicit none
integer(kind=4), intent(in) :: dimnum, dimnom, n, i, j, Cl
real (kind=8), intent(in) :: numa(dimnum), numb(dimnum)
integer(kind=4), intent(in) :: nom(dimnom, n)
real (kind=8), intent(in) :: tgtData(n)
real (kind=8), intent(in) :: sd(dimnum)
real (kind=8) :: resnum, resnom
if (dimnom == 0) then
resnum = HVDMnum(numa, numb, dimnum, sd)
resnom = 0.0d0
else if (dimnum == 0) then
resnum = 0.0d0
resnom = HVDMnom(nom, dimnom, tgtData, n, i, j, Cl)
else
resnum = HVDMnum(numa, numb, dimnum, sd)
resnom = HVDMnom(nom, dimnom, tgtData, n, i, j, Cl)
end if
HVDM = sqrt(resnum + resnom)
end function HVDM
double precision function HVDMnum(numa, numb, dimnum, sd)
implicit none
integer(kind=4), intent(in) :: dimnum
real (kind=8), intent(in) :: numa(dimnum), numb(dimnum), sd(dimnum)
integer (kind=4) :: i
! Epsilon value
real (kind=8), PARAMETER :: eps = 1d-30
HVDMnum = 0.0d0
do i=1, dimnum
if (sd(i) > eps) then
HVDMnum = HVDMnum + ((abs(numa(i)-numb(i))/(4*sd(i)))**2)
end if
end do
end function HVDMnum
double precision function HVDMnom(nomdata, dimnom, tgtData, n, i, j, Cl)
implicit none
integer(kind=4), intent(in) :: dimnom, n, i, j, Cl
integer(kind=4), intent(in) :: nomdata(dimnom, n)
real (kind=8), intent(in) :: tgtData(n)
real (kind=8) :: res
integer (kind=4) :: att, k, l, ci, cj, p, NCli, NClj
real (kind=8) :: resAt, finalresAt
finalresAt = 0
do att=1,dimnom
resAt = 0
ci = 0
cj = 0
do p=1,n
if (nomdata(att,p)==nomdata(att,i)) then
ci = ci + 1
end if
if (nomdata(att,p)==nomdata(att,j)) then
cj = cj + 1
end if
end do
do k=1,Cl
NCli = 0
NClj = 0
do l=1,n
if (nomdata(att,l)==nomdata(att,i) .and. tgtData(l)==tgtData(i)) then
NCli = NCli + 1
end if
if (nomdata(att,l)==nomdata(att,j) .and. tgtData(l)==tgtData(j)) then
NClj = NClj + 1
end if
end do
resAt = resAt + (abs((NCli/ci) - (NClj/cj)))**2
end do
finalresAt = finalresAt + resAt
end do
HVDMnom = finalresAt
end function HVDMnom
end subroutine F_neighbours