% Lab 5 Master Script % % %________________________________________________________________________ % Script: Lab05.m % Purpose: Linear Discrimiant and Ridge Regression % Author: Tom Nichols % Date: 5 Feb 2012 % %%% Lab 4 wrap up... can we use feature reduction? % % Get digits % load digits.mat nDigit = 10; nPixel = size(digit_img,1); % Define training and test cases Itrain = 1:1000; Itest = 1001:2000; [ccMean, ccVar] = train_digit_classifier(digit_img(:,Itrain),digit_lab(1,Itrain)); % visualize the class conditionals colormap gray for i=1:10 subplot(3,10,i); imagesc(reshape(ccMean(:,i), [16 16]),[0 256]); axis image subplot(3,10,i+10); imagesc(reshape(sqrt(ccVar(:,i)),[16 16]),[0 150]); axis image subplot(3,10,i+20); imagesc(reshape(ccVar(:,i)==0, [16 16]),[0 1]); axis image end % % Now try the different variance options nTrain= nTrain= % 1000 100 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),0) % 0.7900 0.6300 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),1) % 0.7910 0.6550 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),2) % 0.7560 0.4090 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),3) % 0.7590 0.4270 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),4) % 0.7620 0.4520 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),5) % 0.6860 0.1000 test_digit_classifier(ccMean,ccVar,digit_img(:,Itest),digit_lab(1,Itest),6) % 0.7910 0.6550 %%% -> Conclusion - Pooling variance over all samples is best (Naive-Naive-Bayesian Classifier!) % % Subset selection % nPixel x nDigit % Find most important (variable) pixels [srt Isrt] = sort(var(ccMean,0,2)); Isrt = flipud(Isrt); % Need biggest variance first % Now consider various subsets of most variable pixels... nBest=1; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.1000 nBest=2; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.2570 nBest=10; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.3600 nBest=20; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.5270 nBest=50; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.7060 nBest=100; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.7570 nBest=200; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.7890 nBest=256; test_digit_classifier(ccMean(Isrt(1:nBest),:),ccVar(Isrt(1:nBest),:),... digit_img(Isrt(1:nBest),Itest),digit_lab(1,Itest),1) % 0.7910 % So, well, no, no simply selection of pixels out-performs using all voxels %%% Lab 5 - Demonstration % As for big fonts set(0,'DefaultAxesFontSize',18) % Get some 'mystery data' [Y X]=GetDat(100,0.1); % And an independent sample [Ynew Xnew YnewTr]=GetDat(1000,0.1); MSEtru = mean((Ynew(:) - YnewTr(:)).^2); % Try to visualize it... figure(1);scatter3(X(:,1),X(:,2),Y);xlabel('X_1');ylabel('X_2');zlabel('Y') figure(2); plotmatrix([Y X]) % ... very hard to try figure(3) % % Let's try ridge regression % % Set up some values of lambda to try in Rdige llamb=[-10:3:10]'; lamb=exp(llamb); Ylim=[0.1 1.1]; clf % Linear only [MSE,cvMSE]=RegMSE(X,Y,Xnew,Ynew,lamb); subplot(2,3,1) plot(llamb,[MSE,cvMSE]);ThickLine;legend('Train','Test','location','NorthWest') set(gca,'ylim',Ylim) title('X_1 + X_2') % 1st order interaction [MSE,cvMSE]=RegMSE([X prod(X,2)],Y,[Xnew prod(Xnew,2)],Ynew,lamb); subplot(2,3,2) plot(llamb,[MSE,cvMSE]);ThickLine; set(gca,'ylim',Ylim) title('X_1 + X_2 + X_1X_2') % Quadratic [MSE,cvMSE]=RegMSE([X X.^2],Y,[Xnew Xnew.^2],Ynew,lamb); subplot(2,3,3) plot(llamb,[MSE,cvMSE]);ThickLine; set(gca,'ylim',Ylim) title('X_1 + X_2 + X_1^2 + X_2^2') % Cubic plus 1st order interaction [MSE,cvMSE]=RegMSE([X X.^2 prod(X,2) X.^3],Y,[Xnew Xnew.^2 prod(Xnew,2) Xnew.^3],Ynew,lamb); subplot(2,3,4) plot(llamb,[MSE,cvMSE]);ThickLine set(gca,'ylim',Ylim) title('X_1 + X_2 + X_1^2 + X_2^2 + X_1X_2 + X_1^3 + X_2^3') % Cubic plus higher order interactions [MSE,cvMSE]=RegMSE([X X.^2 prod(X,2) X.^3 X(:,1).^2.*X(:,2) X(:,1).*X(:,2).^2],Y,... [Xnew Xnew.^2 prod(Xnew,2) Xnew.^3 Xnew(:,1).^2.*Xnew(:,2) Xnew(:,1).*Xnew(:,2).^2],Ynew,lamb); subplot(2,3,5) plot(llamb,[MSE,cvMSE]);ThickLine set(gca,'ylim',Ylim) title({'X_1 + X_2 + X_1^2 + X_2^2 + X_1X_2 + X_1^3',' + X_2^3 + X_1^2X_2 + X_1X_2^2'}) % Quadratic plus interaction [MSE,cvMSE]=RegMSE([X X.^2 prod(X,2) X.^3 X.^4 X.^5 X.^6 X.^7 X.^8 ],Y,... [Xnew Xnew.^2 prod(Xnew,2) Xnew.^3 Xnew.^4 Xnew.^5 Xnew.^6 Xnew.^7 Xnew.^8 ],Ynew,lamb); subplot(2,3,6) plot(llamb,[MSE,cvMSE]);ThickLine; set(gca,'ylim',Ylim) title({'X_1 + X_2 + X_1^2 + X_2^2 + ','... X_1^6 + X_2^6'}) abline('h',MSEtru) %%% But all sill pretty far from true MSE % Can you see the light now? [Y X]=GetDat(1000,0.5);figure(5);scatter3(X(:,1),X(:,2),Y) [Y X]=GetDat(10000,0.5);figure(5);scatter3(X(:,1),X(:,2),Y) [Y X]=GetDat(10000,0.0);figure(5);scatter3(X(:,1),X(:,2),Y) % Test data % Get some 'mystery data' [Y X]=GetDat(100,0.1); save Train.mat X Y figure(5);scatter3(X(:,1),X(:,2),Y) % And an independent sample [Y X Ytrue]=GetDat(1000,0.1); save Test.mat X Y Ytrue