Adding initial files

Ritam-Guha · Jul 14, 2020 · 9aa3c99 · 9aa3c99
1 parent 1e72d3b
commit 9aa3c99
Show file tree

Hide file tree

Showing 11 changed files with 773 additions and 0 deletions.
diff --git a/WOA.m b/WOA.m
@@ -0,0 +1,152 @@
+function []=WOA(str,chaos,k)
+rng('shuffle');
+global x memory;
+global iteration populationSize ;
+createDivision();
+totalPop = populationSize;
+[~,c]=size(x);
+%% generating initial population
+
+population=datacreate(populationSize,c);
+p=0.3;
+decrement_ratio=0.8;
+fitness = zeros(1,populationSize);
+%% calculating fitness value of each solution
+for i=1:populationSize
+    [fitness(i,1)]=crossValidation(population(i,:),k);
+end
+[best,id]=sort(fitness,'descend');
+%% assiging fittest solution as prey
+prey=population(id(1,1),:);
+preyacc=best(1,1);
+init_best_acc=preyacc;
+init_best_feat=prey;
+fprintf('INITAL PERCENTAGE OF FEATURE SELECTED = %f\n',(sum(prey)*100/c)*100);
+fprintf('INITIAL BEST ACCURACY TILL NOW = %f\n',preyacc*100);
+
+%% for each iteration
+tic
+updatedPopulation = zeros(populationSize,c);
+ new_fitness = zeros(populationSize,1);
+for q=1:iteration
+    fprintf('\n');
+    fprintf('========================================\n');
+    fprintf('             Iteration - %d\n',q);
+    fprintf('========================================\n\n');
+
+    location = strcat('Results/',str,'/');
+    folderName = strcat(location,'WOA_',int2str(chaos),'_Pop_',int2str(totalPop),'_Iter_',int2str(iteration),'_KNN_',int2str(k));    
+    for i=1:populationSize
+        p=chaotic(p,chaos);
+        %         fprintf('p=%f\n',p);
+        if p<0.5
+            a=2-q*(2/iteration);
+            r=rand(1,c);
+            C=2*r;
+            A=calculate(a,r,c);
+            if modulas(A,c)<1
+                D=dista(C,population(i,:),prey,c);
+                %                 fprintf('---------Encircle--------\n');
+                updatedPopulation(i,:)=encircle(prey,c,A,D);
+            else
+                sagent=round((populationSize-1)*(rand()))+1;
+                D=dista(C,population(i,:),population(sagent,:),c);
+                %                 fprintf('---------Exploration--------\n');
+                updatedPopulation(i,:)=encircle(population(sagent,:),c,A,D);
+            end
+        else
+            %             fprintf('---------Spiral--------\n');
+            updatedPopulation(i,:)=spiral(population(i,:),prey,c);  % using spiral equation to update the position
+        end
+    end
+    fprintf('\n================local search starts=================\n');
+    [updatedPopulation]=local_search(updatedPopulation,populationSize,str);
+    fprintf('\n================local search ends===================\n');
+
+
+    for i=1:populationSize
+        [new_fitness(i,1)]=crossValidation(updatedPopulation(i,:),k);
+    end
+    [nbest,nid]=sort(new_fitness,'descend');
+    new_fitness=new_fitness(nid,:);
+    updatedPopulation = updatedPopulation(nid,:);
+    if nbest(1,1) > preyacc
+        prey=updatedPopulation(nid(1,1),:);
+        preyacc=nbest(1,1);
+    end
+
+
+    populationSize=max(15,int16(decrement_ratio*populationSize));
+    population(1:populationSize,:)=updatedPopulation(1:populationSize,:);
+	fitness(1:populationSize,1)=new_fitness(populationSize,1);
+    fprintf('BEST ACCURACY TILL CURRENT ITERATION-%f\n',preyacc*100);
+    save(strcat(folderName,'/Iteration_',int2str(q),'.mat'),'prey','preyacc');
+end
+
+fprintf('\n\n---------------------BEST RESULT------------------\n');
+fprintf('INITIAL BEST ACCURACY:%f\n',init_best_acc*100);
+fprintf('PERCENTAGE OF INITIAL FEATURE SELECTED=%f\n',sum(init_best_feat)*100/c);
+fprintf('BEST ACCURACY:%f\n',preyacc*100);
+fprintf('PERCENTAGE OF FEATURE SELECTED=%f\n',sum(prey)*100/c);
+fprintf('--------------------------------------------------\n');
+time=toc;
+memory(1,:).accuracy = preyacc;
+memory(1,:).features = prey;
+memory.accuracy(2:(1+size(new_fitness,1)),:) = new_fitness;
+memory.features(2:(1+size(new_fitness,1)),:) = updatedPopulation;
+memory.time=time;
+% save(strcat('Results/',str,'/Final_WOA_',int2str(chaos),'_Pop_',int2str(populationSize),'_Iter_',int2str(iteration),'_KNN_',int2str(k),'.mat'),'population','fitness','memory');
+end
+function [P]=calculate(a,r,c)
+for i=1:c
+    P(1,i)=2*a*r(1,i)-a;
+end
+end
+function [m]=modulas(A,c)
+s=0;
+for i=1:c
+    s=s+A(1,i)*A(1,i);
+end
+m=sqrt(s);
+end
+function [d]=dista(C,X,Xp,c)
+for i=1:c
+    d(1,i)=abs(C(1,i)*Xp(1,i)-X(1,i));
+end
+end
+%% division of crossvalidation
+function [] = createDivision()
+global t fold selection;
+rows = size(t,1);
+s=size(t,1);
+labels=zeros(1,s);
+for i=1:s
+    labels(1,i)=find(t(i,:),1);
+end
+l = max(labels);
+selection=zeros(1,rows);
+% disp(size(selection));
+for k=1:l
+    count1=sum(labels(:)==k);
+    samplesPerFold=int16(floor((count1/fold)));
+    for j=1:fold
+        count=0;
+        for i=1:rows
+            if(labels(i)==k && selection(i)==0)
+                selection(i)=j;
+                count=count+1;
+            end
+            if(count==samplesPerFold)
+                break;
+            end
+        end
+    end
+    j=1;
+    for i=1:rows
+        if(selection(i)==0 && labels(i)==k)
+            selection(i)=j;%sorts any extra into rest
+            j=j+1;
+        end
+    end
+end
+end
diff --git a/chaotic.m b/chaotic.m
@@ -0,0 +1,45 @@
+function [p]=chaotic(p,type)
+if type==1
+    p=circle(p);
+elseif type==2
+    p=logistics(p);
+elseif type==3
+    p=piecewise(p);
+elseif type==4
+    p=tent(p);
+end
+
+end
+
+function [p]=circle(p)
+a=0.5;
+b=0.2;
+pi=3.1415;
+p=mod(p+b-(a/(2*pi))*sin(2*pi*p),1);
+end
+
+function [p]=logistics(p)
+a=4;
+p=a*p*(1-p);
+end
+
+function [p]=piecewise(p)
+a=0.4;
+if (p>=0 && p<a)
+    p=p/a;
+elseif (a<=p && p<0.5)
+    p=(p-a)/(0.5-a);
+elseif (p>=0.5 && p<(1-a))
+    p=(1-a-p)/(0.5-a);
+elseif (p>=(1-a) && p<1)
+    p=(1-p)/a;
+end
+end
+
+function [p]=tent(p)
+if(p<0.7)
+    p=p/0.7;
+else
+    p=(10*(1-p))/3;
+end
+end
diff --git a/crossValidation.m b/crossValidation.m
@@ -0,0 +1,157 @@
+function [per]=crossValidation(chromosome,k)
+global x t fold selection;
+rng('default');
+
+rows=size(x,1);
+cols=size(x,2);
+% disp(rows);
+% disp(cols);
+accuracy=zeros(1,fold);
+data=x(:,chromosome==1);
+if (size(data,2)==0)
+    per = 0;
+    return;
+end
+for i=1:fold
+    %fprintf('Fold - %d\n',i);
+    chr=zeros(rows,1);%0 training, 1 test
+%     disp(rows);
+%     disp(size(selection));
+    for j=1:rows
+        if selection(j)==i
+            chr(j,1)=1;
+        end
+    end
+    ch = 1;
+    if (ch==1)
+        accuracy(1,i) = knnClassify(data,t,chr,k);
+    elseif (ch==2)
+        accuracy(1,i) = svmClassify(data,t,chr,k);
+    else
+        accuracy(1,i) = mlpClassify(data,t,chr,k);
+    end
+end
+%%{
+% for i=1:fold
+%     fprintf('%f\t',accuracy(i));
+% end
+% fprintf('\n');
+%}
+per = mean(accuracy);
+fprintf('Features - %10d      Accuracy - %10f\n',(size(data,2)/cols)*100,per*100);
+end
+function [performance]=knnClassify(x,t,chr,k)
+x2=x(chr(:)==1,:);
+t2=t(chr(:)==1,:);
+x=x(chr(:)==0,:);
+t=t(chr(:)==0,:);
+
+s=size(t,1);
+label=zeros(1,s);
+for i=1:s
+    label(1,i)=find(t(i,:),1);
+end
+
+knnModel=fitcknn(x,label,'NumNeighbors',k,'Standardize',1);
+[label,~] = predict(knnModel,x2);
+%label
+
+s=size(t2,1);
+lab=zeros(s,1);
+for i=1:s
+    lab(i,1)=find(t2(i,:),1);
+end
+%[c,~]=confusion(t2,label);
+%%{
+%size(lab)
+%size(label)
+c = sum(lab ~= label)/s; % mis-classification rate
+%conMat = confusionmat(Y(P.test),C) % the confusion matrix
+%}
+performance=1-c;
+%{
+fprintf('Number of features - %d\n',size(x,2));
+fprintf('The correct classification is %f\n',(100*performance));
+%}
+end
+
+function [performance]=mlpClassify(x,t,chr,k)
+h = k;
+
+target=t(chr(:)==0,:);
+
+input=x(chr(:)==0,:);
+%fprintf('Train set created\n');
+
+inputs = input';
+targets = target';
+
+hiddenLayerSize = h;
+
+net = patternnet(hiddenLayerSize);
+net.trainParam.showWindow = 0;
+
+% Setup Division of Data for Training, Validation, Testing
+net.divideParam.trainRatio =85/100;
+net.divideParam.valRatio = 15/100;
+net.divideParam.testRatio = 0/100;
+
+% Train the Network
+%size(inputs)
+%size(targets)
+[net, ] = train(net,inputs,targets);
+
+clear targets target inputs input;
+% Test the Network
+%test set build
+target=t(chr(:)==1,:);
+input=x(chr(:)==1,:);
+inputs=input';targets=target';
+outputs = net(inputs);
+
+[c, ] = confusion(targets,outputs);
+%{
+fprintf('The number of features  : %d\n', size(x,2));
+fprintf('Percentage Correct Classification   : %f%%\n', 100*(1-c));
+fprintf('Percentage Incorrect Classification : %f%%\n', 100*c);
+%}
+performance=1-c;%how much accuracy we get
+end
+
+function [performance]=svmClassify(x,t,chr,k)
+h = 'linear';
+x2=x(chr(:)==1,:);
+t2=t(chr(:)==1,:);
+x=x(chr(:)==0,:);
+t=t(chr(:)==0,:);
+
+s=size(t,1);
+label=zeros(1,s);
+for i=1:s
+    label(1,i)=find(t(i,:),1);
+end
+
+if max(label)==2
+    svmModel=fitcsvm(x,label,'KernelFunction',h,'Standardize',true,'ClassNames',[1 2]);
+else
+    class=zeros(1,max(label));
+    for i=1:max(label)
+        class(i)=i;
+    end
+    temp = templateSVM('Standardize',1,'KernelFunction',h);
+    svmModel = fitcecoc(x,label,'Learners',temp,'FitPosterior',1,'ClassNames',class,'Coding','onevsall');
+end
+[label,~] = predict(svmModel,x2);
+
+s=size(t2,1);
+lab=t2';
+
+c = sum(lab ~= label)/s; % mis-classification rate
+%conMat = confusionmat(Y(P.test),C) % the confusion matrix
+%}
+performance=1-c;
+%{
+fprintf('Number of features - %d\n',size(x,2));
+fprintf('The correct classification is %f\n',(100*performance));
+%}
+end
diff --git a/datacreate.m b/datacreate.m
@@ -0,0 +1,27 @@
+%creates a feature list & value of acuuracy of list out of 1(0-1)
+function [data] = datacreate(n,num)
+%n is the number of chromosomes we are working on
+%num is the number of features
+
+rng('shuffle');
+max=int16(num*0.50);%number of features we have
+min=int16(num*0.40);%number of features we take minimum
+if(max+min>n)
+    max=max-1;
+    min=min-1;
+end
+data=int16(zeros(n,num));
+for i=1:n
+    x2=int16(abs(rand*(min)))+(max);%number of features we select at max will have 5 features less than maximum
+    temp=rand(1,num);
+    [~,temp]=sort(temp);
+    for j= 1:x2
+        data(i,temp(j))=1;
+    end
+    %fprintf('number of features incorrect : %d \n', (count-x));
+    %fprintf('number of features : %d \n', sum(data(i,:)==1));
+    clear x2 temp;
+end
+clear max min count;
+data = data > 0.5;
+end