%data analysis %loading data: 24*3 array, hourly traffic counts(row) at 3 different intersections over 24 hours load count.dat %missing data: at 3rd intersection c3 = count(:,3); %data at intersection 3 c3NanCount = sum(isnan(c3)) % isnan is a function %outliers bin_counts = hist(c3); %histogram bin counts N = max(bin_counts);% maximum bin count mu3 = mean(c3); %data mean sigma3= std(c3); % data standard deviation hist(c3); % plot histogram hold on plot([mu3 mu3], [0 N],'r','LineWidth',2); % plot mean hold on plot([mu3+2*sigma3, mu3+2*sigma3], [0 N],'g'); % plot 2 std hold off; %replace data with more than two std outlier = (c3-mu3)>2*sigma3; c3(outlier) = NaN;%[] % reassign NaN values to outlier %smoothing and filtering % plot of time-series of the third intersection plot(c3,'o-') hold on %pay attention to NaN value in plot %apply a simple moving average smoother: span = 3; %size of the averaging window window = ones(span,1)/span; smoothed_c3 = convn(c3, window,'same'); %conv returns the central part of the convolution h = plot(smoothed_c3,'ro-');hold on; legend('Data','Smoothed Data'); %filter function used for smoothing data: smoothed2_c3 = filter(window, 1, c3); plot(smoothed2_c3,'mo-');% filter returns the initial part of the convolution %-------------------------------------------------------- %summarizing data x1 = mean(count) x2 = median(count) x3 = mode(count) %measuring scale dx1 = max(count) - min(count) dx2 = std(count) dx3 = var(count) %shape of distribution figure hist(count) legend('Intersection 1','Intersection 2','Intersection 3') %modelling the distribution shape-choose exponential distribution, with %parameter mu as data mean c1 = count(:,1); %data at intersection 1 [bin_counts, bin_location] = hist(c1); bin_width = bin_location(2) - bin_location(1); hist_area = bin_width* sum(bin_counts); figure hist(c1) hold on mu1 = mean(c1) exp_pdf = @(t) (1/mu1)*exp(-t/mu1); %define a function here, without using another m-file t = 0:150; y = exp_pdf(t); plot(t, (hist_area )* y,'r','LineWidth',2); legend('Distribution','Exponential fit') %----------------------------------------------------------------- %visualizing data %2D scatter plots c1 = count(:,1); % data at intersection 1 c2 = count(:,2); % data at intersection 2 figure scatter(c1,c2,'filled') xlabel('Intersection 1'); ylabel('Intersection 2'); C12 = cov([c1 c2]) % covariance-measure the strength of linear relation of the two variables R12 = corrcoef([c1 c2]); % standardize the value of covariance r12 = R12(1,2) % Correlation coefficient %3D scatter plots: relationship between traffic volume at all 3 %intersections figure scatter3(c1,c2,c3,'filled') xlabel('Intersection 1'); ylabel('Intersection 2'); zlabel('Intersection 3'); %eig of their cov matrix tells the strength of the linear relationship %among the variables in 3D scatter %scatter plot array figure plotmatrix(count) % make comparison of the relationship between multiple pairs of intersection %exploring data in graph, e.g. scatter(count(:,1), count(:,3)) %select the Data Cursor Tool and click on the data pt %---------------------------------------------------------------- %modelling data %polynomial regression: polyfit-estimate coefficient, polyval-evaluate the %model at arbitrary values %e.g. c3 = count(:,3); tdata = (1:24)'; p_coeffs = polyfit(tdata, c3, 6); figure plot(c3,'o-'); hold on; tfit = (1:0.01:24)'; yfit = polyval(p_coeffs, tfit); plot(tfit, yfit, 'r-','LineWidth',2); legend('Data','Polynomial fit','Location','NW');