################################################## ### ST906 - Financial Time Series (2019/2020) #### ################################################## ### Introduction ################################# # Some useful references #https://nwfsc-timeseries.github.io/atsa-labs/chap-ts.html #https://cran.r-project.org/web/packages/zoo/vignettes/zoo-read.pdf #https://www.stat.pitt.edu/stoffer/tsa4/ ##this is the main reference with book #install.packages("pacman") #run this only the first time, after that just call the library library(pacman) p_load(datasets, zoo, FinTS, astsa, TTR, xts, tsa, quantmod, ggplot2) Sys.setlocale("LC_ALL","English") ?nyse nyse str(nyse) class(nyse) # Time-Series or ts plot(nyse) data(AirPassengers) AirPassengers class(AirPassengers) # ts: it is printed as a matrix, but stored as a vector ts.plot(AirPassengers) plot(log(AirPassengers)) data(sp500w) ?sp500w head(sp500w) plot(sp500w, lwd = 1) class(sp500w) # xts object, with rownames equal to the date. It provides a nice visualisation. y <- ts(sp500w, start=2003, freq=52) # this is now ts ts.plot(y) autoplot.zoo(sp500w) # from ggplot2 for nice visualisation #http://www.sthda.com/english/articles/32-r-graphics-essentials/128-plot-time-series-data-using-ggplot/ ### Daily returns(or percent change) of the Dow Jones Industrial Average head(djia) ?astsa::djia plot(djia$Close) matplot(djia[,-5], type = "l") # plot multiple time series, except the 5th column legend("bottomright", legend = names(djia)[-5], col = 1:4, lty = 1) quantmod::getSymbols("^DJI", src = "yahoo") # now in the enviroment you should have DJI head(DJI) plot(DJI$DJI.Close) plot(djia, main = "DJIA Returns", type = "n") # type = "n" gives the background of the plot lines(djia, col = 1) plot(djia) ### White noise ################################## #Gaussian iid white noise wn <- rnorm(200) wn <- ts(wn) ts.plot(wn) var(wn) abline(h=mean(wn), col = "orange") wn_filtered_3 = filter(wn, sides=2, filter=rep(1/3,3)) # moving average wn_filtered_27 = filter(wn, sides=2, filter=rep(1/27,27)) # moving average lines(wn_filtered_3, col = 2) lines(wn_filtered_27, col = 3) # The wider the window of the filter, the "smoother" the moving average is. ### Random walk (from TSA4) ###################### set.seed(154) # so you can reproduce the results w = rnorm(200) x = cumsum(w) wd = w +.2 xd = cumsum(wd) plot.ts(xd, ylim=c(-5,55), main="random walk", ylab='') lines(x, col=4) abline(h=0, col=4, lty=2) abline(a=0, b=.2, lty=2) ### Stationarity - ACF and PACF ################## acf(wn) # the autocorrelation function of a white noise shows zero correlation after lag 0. acf_theory <- ARMAacf(ar = 0, ma = c(0.7, 0.2), lag.max = 20) plot(acf_theory) # theoretical ACF for a MA(2) # Tip: identification of MA models is often done with the ACF (the last lag at which it is non-zero gives the order) pacf_theory <- ARMAacf(ar = 0.9, ma = 0, lag.max = 20, pacf = TRUE) plot(1:20, pacf_theory) # theoretical PACF for an AR(1) # Tip: identification of AR models is often done with the PACF (the last lag at which it is non-zero gives the order) sp500w_stat <- sp500w[seq_len(52*5)] # just to select a "statinary-looking" chunk of data plot(sp500w_stat, main = "SP500W") acf(sp500w_stat) # what do you get from here? pacf(sp500w_stat) ### Backward operator and difference operator (from TSA4)#### ?lag acf_sp500w = round(acf(sp500w_stat, 10, plot=FALSE)$acf[-1], 3)# first 6 sample acf values par(mfrow=c(1,2)) plot(as.numeric(lag(sp500w_stat,-1)), as.numeric(sp500w_stat), xlab = "lag(sp500w, -1)", ylab = "sp500w") legend('topleft', legend=acf_sp500w[1]) plot(as.numeric(lag(sp500w_stat,-6)), as.numeric(sp500w_stat), xlab = "lag(sp500w, -6)", ylab = "sp500w") legend('topleft', legend=acf_sp500w[6]) op(par) ?diff(1:10, 2, 2) # difference operator ### ARMA(1,1) #################################### op <- par(mfrow=c(1,2)) # split the plotting area in two boxes side by side ts.sim <- arima.sim(list(order = c(1,0,0), ar = 0.7), n = 200) # Remember: ARIMA(p,0,q) = ARMA(p,q) acf(ts.sim) pacf(ts.sim) par(op) # go back to original settings ### Trend in time series ######################### trend_air <- lm(AirPassengers ~ time(AirPassengers)) # fit a linear model to extract trend summary(trend_air) # print the coefficients and evaluate significance plot(AirPassengers) abline(trend_air, col = 2) time_air <- time(AirPassengers) time_air_sq <- time(AirPassengers)^2 trend_air_quad <- lm(AirPassengers ~ time_air + time_air_sq) # add a quadratic term # you cannot use abline because this is not a straight line x_time <- seq(min(time(AirPassengers)), max(time(AirPassengers)), length.out = 1000) y_trend_sq <- predict(trend_air_quad, list(time_air = x_time, time_air_sq = x_time^2)) lines(x_time, y_trend_sq, col=3) lines(lowess(AirPassengers, f=.05), lwd=2, col=4) # lowess, nonparametric smoothing lines(lowess(AirPassengers, f=.5), lwd=2, col=5) # lo(w)ess has one parameter f: the smaller, the wigglier the function - but it will track closely the data # this is an example of bias-variance tradeoff x = rnorm(150, mean=5) ?arima air_ar1 <- arima(AirPassengers, order=c(1,0,0)) # fit an ARIMA model - find the values of the parameters air_ma1 <- arima(AirPassengers, order=c(0,0,1)) air_arma11 <- arima(AirPassengers, order=c(1,0,1)) AIC_air <- c(air_ar1$aic, air_ma1$aic, air_arma11$aic) # model comparison: the smaller is AIC, the better is the model names(AIC_air) <- c("AR(1)", "MA(1)", "ARMA(1,1)") AIC_air #Useful transformations: differencing, taking logarithms difflog_air = diff(log(AirPassengers)) diff_difflog_air = diff(log(AirPassengers), 12) plot.ts(cbind(AirPassengers, log(AirPassengers), difflog_air, diff_difflog_air), main="") monthplot(difflog_air) monthplot(diff_difflog_air) ### Seasonality ################################## set.seed(90210) x = 2*cos(2*pi*1:500/50 + .6*pi) + rnorm(500,0,5) z1 = cos(2*pi*1:500/50) z2 = sin(2*pi*1:500/50) summary(fit <- lm(x~0+z1+z2)) plot.ts(x) plot.ts(x, col=8, ylab=expression(hat(x))) lines(fitted(fit), col=2) periodogram(AirPassengers) # Frequency domain analysis plot(decompose(AirPassengers)) plot(stl(AirPassengers, "per")) # it makes all the decomposition for you. Once you have removed trend and seasonality, # you have to study the residual time series in terms of ACF.