# this command enters the data and turns it into a DATA FRAME with headers diffs <- read.table("us_tax_onelag.csv", sep=",", header = TRUE) # runs a ridge regression and names the variables and computes the X matrix and the Y vector ridge01 <- RXridge(dln_cv_ln_net~dln_mn_ln_net+ # ridge01 <- RXridge(dln_cv_ln_gross~dln_mn_ln_gross+ dln_mn_hcap+dln_cv_hcap+dln_wages+dln_int_rec+dln_div_rec+dln_lt_gain+dln_mortgage+dln_pension_rec, data=diffs, rscale=0, Q=0) ridge02 <- RXridge(dln_cv_ln_net~dln_mn_ln_net+ # ridge02 <- RXridge(dln_cv_ln_gross~dln_mn_ln_gross+ dln_mn_hcap+dln_cv_hcap+dln_wages+dln_int_rec+dln_div_rec+dln_lt_gain+dln_mortgage+dln_pension_rec, data=diffs, rscale=0, Q="qmse") yvarname <- c("dln_cv_ln_net") yvec <- diffs$dln_cv_ln_net-mean(diffs$dln_cv_ln_net) # yvarname <- c("dln_cv_ln_gross") # yvec <- diffs$dln_cv_ln_gross-mean(diffs$dln_cv_ln_gross) expvarnames <- c("dln_mn_ln_net", # expvarnames <- c("dln_mn_ln_gross", "dln_mn_hcap","dln_cv_hcap","dln_wages","dln_int_rec","dln_div_rec","dln_lt_gain","dln_mortgage","dln_pension_rec") # how many explanatory variables? how many observations? nuexpvar <- ridge01$p nuobs <- ridge01$n # CHANGE THE X-MATRIX !!! xmat <- matrix(c( diffs$dln_mn_ln_net-mean(diffs$dln_mn_ln_net), # diffs$dln_mn_ln_gross-mean(diffs$dln_mn_ln_gross), diffs$dln_mn_hcap-mean(diffs$dln_mn_hcap), diffs$dln_cv_hcap-mean(diffs$dln_cv_hcap), diffs$dln_wages-mean(diffs$dln_wages), diffs$dln_int_rec-mean(diffs$dln_int_rec), diffs$dln_div_rec-mean(diffs$dln_div_rec), diffs$dln_lt_gain-mean(diffs$dln_lt_gain), diffs$dln_mortgage-mean(diffs$dln_mortgage), diffs$dln_pension_rec-mean(diffs$dln_pension_rec)),nrow=nuobs,ncol=nuexpvar) colnames(xmat) <- expvarnames # # # # # # # xtx <- t(xmat)%*%xmat xty <- t(xmat)%*%yvec lamval01 <- c(rep(NA,times=nuexpvar)) lammat01 <- matrix(c(0),nrow=nuexpvar,ncol=nuexpvar) for (i in 1:nuexpvar) {lamval01[i] <- ridge01$prinstat[i,1]} for (i in 1:nuexpvar) {lammat01[i,i] <- lamval01[i]^ridge01$qp} lamval02 <- c(rep(NA,times=nuexpvar)) lammat02 <- matrix(c(0),nrow=nuexpvar,ncol=nuexpvar) for (i in 1:nuexpvar) {lamval02[i] <- ridge02$prinstat[i,1]} for (i in 1:nuexpvar) {lammat02[i,i] <- lamval02[i]^ridge02$qp} s <- svd(xmat) glg01 <- s$v%*%lammat01%*%t(s$v) glg02 <- s$v%*%lammat02%*%t(s$v) # find optimal k value mlikno01 <- c(1:length(ridge01$mlik[,2])) wheremin01 <- c(rep(NA,times=length(ridge01$mlik[,2]))) choosek01 <- matrix(nrow=length(ridge01$mlik[,2]),ncol=5) colnames(choosek01) <- c("obsno","M","K","CLIK","minat") for (i in 1:length(ridge01$mlik[,2])) {choosek01[i,1] = mlikno01[i]} for (i in 1:length(ridge01$mlik[,2])) {choosek01[i,2] = ridge01$mlik[i,1]} for (i in 1:length(ridge01$mlik[,2])) {choosek01[i,3] = ridge01$mlik[i,2]} for (i in 1:length(ridge01$mlik[,2])) {choosek01[i,4] = ridge01$mlik[i,3]} for (i in 1:length(ridge01$mlik[,2])) {wheremin01[i] = ifelse(ridge01$mlik[i,3]!=min(ridge01$mlik[,3]),length(ridge01$mlik[,2])+1,mlikno01[i])} for (i in 1:length(ridge01$mlik[,2])) {choosek01[i,5] = min(wheremin01[])} mlikno02 <- c(1:length(ridge02$mlik[,2])) wheremin02 <- c(rep(NA,times=length(ridge02$mlik[,2]))) choosek02 <- matrix(nrow=length(ridge02$mlik[,2]),ncol=5) colnames(choosek02) <- c("obsno","M","K","CLIK","minat") for (i in 1:length(ridge02$mlik[,2])) {choosek02[i,1] = mlikno02[i]} for (i in 1:length(ridge02$mlik[,2])) {choosek02[i,2] = ridge02$mlik[i,1]} for (i in 1:length(ridge02$mlik[,2])) {choosek02[i,3] = ridge02$mlik[i,2]} for (i in 1:length(ridge02$mlik[,2])) {choosek02[i,4] = ridge02$mlik[i,3]} for (i in 1:length(ridge02$mlik[,2])) {wheremin02[i] = ifelse(ridge02$mlik[i,3]!=min(ridge02$mlik[,3]),length(ridge02$mlik[,2])+1,mlikno02[i])} for (i in 1:length(ridge02$mlik[,2])) {choosek02[i,5] = min(wheremin02[])} # here's optimal k chosenk01 <- choosek01[min(wheremin01[]),3] chosenk02 <- choosek02[min(wheremin02[]),3] # get beta and covariance matrix betar01 <- ridge01$coef[min(wheremin01[]),] varr01 <-(1/(nuobs-nuexpvar))*(t(yvec-xmat%*%betar01)%*%(yvec-xmat%*%betar01)) covr01 <- varr01[1,1]*solve(xtx + chosenk01*glg01)%*%xtx%*%solve(xtx + chosenk01*glg01) betar02 <- ridge02$coef[min(wheremin02[]),] varr02 <-(1/(nuobs-nuexpvar))*(t(yvec-xmat%*%betar02)%*%(yvec-xmat%*%betar02)) covr02 <- varr02[1,1]*solve(xtx + chosenk02*glg02)%*%xtx%*%solve(xtx + chosenk02*glg02) # calculate RSS, TSS, R-squared and F-stat rssr01 <- t(yvec-xmat%*%betar01)%*%(yvec-xmat%*%betar01) tssr01 <- t(yvec)%*%yvec r2r01 <- 1-(rssr01[1,1]/tssr01[1,1]) fstatr01 <- (r2r01/(1-r2r01))*((nuobs-nuexpvar)/(nuexpvar-1)) adjr2r01 <- 1-((1-r2r01)*((nuobs-1)/(nuobs-nuexpvar))) rssr02 <- t(yvec-xmat%*%betar02)%*%(yvec-xmat%*%betar02) tssr02 <- t(yvec)%*%yvec r2r02 <- 1-(rssr02[1,1]/tssr02[1,1]) fstatr02 <- (r2r02/(1-r2r02))*((nuobs-nuexpvar)/(nuexpvar-1)) adjr2r02 <- 1-((1-r2r02)*((nuobs-1)/(nuobs-nuexpvar))) # set up coefficient table betarse01 <- c(rep(NA,times=nuexpvar)) betarts01 <- c(rep(NA,times=nuexpvar)) for (i in 1:nuexpvar) {betarse01[i] = sqrt(covr01[i,i])} for (i in 1:nuexpvar) {betarts01[i] = betar01[i]/betarse01[i]} betarse02 <- c(rep(NA,times=nuexpvar)) betarts02 <- c(rep(NA,times=nuexpvar)) for (i in 1:nuexpvar) {betarse02[i] = sqrt(covr02[i,i])} for (i in 1:nuexpvar) {betarts02[i] = betar02[i]/betarse02[i]} coeffs01 <- matrix(nrow=nuexpvar,ncol=4) rownames(coeffs01) <- expvarnames colnames(coeffs01) <- c("coeff","s.e.","t-stat","MSE risk") for (i in 1:nuexpvar) {coeffs01[i,1] = betar01[i]} for (i in 1:nuexpvar) {coeffs01[i,2] = betarse01[i]} for (i in 1:nuexpvar) {coeffs01[i,3] = betarts01[i]} for (i in 1:nuexpvar) {coeffs01[i,4] = ridge01$rmse[min(wheremin01[]),i]} coeffs02 <- matrix(nrow=nuexpvar,ncol=4) rownames(coeffs02) <- expvarnames colnames(coeffs02) <- c("coeff","s.e.","t-stat","MSE risk") for (i in 1:nuexpvar) {coeffs02[i,1] = betar02[i]} for (i in 1:nuexpvar) {coeffs02[i,2] = betarse02[i]} for (i in 1:nuexpvar) {coeffs02[i,3] = betarts02[i]} for (i in 1:nuexpvar) {coeffs02[i,4] = ridge02$rmse[min(wheremin02[]),i]} # set up coefficient table for OLS comparison betaols <- ridge01$coef[1,] varols <-(1/(nuobs-nuexpvar))*(t(yvec-xmat%*%betaols)%*%(yvec-xmat%*%betaols)) covols <- varols[1,1]*solve(xtx) rssols <- t(yvec-xmat%*%betaols)%*%(yvec-xmat%*%betaols) tssols <- t(yvec)%*%yvec r2ols <- 1-(rssols[1,1]/tssols[1,1]) fstatols <- (r2ols/(1-r2ols))*((nuobs-nuexpvar)/(nuexpvar-1)) adjr2ols <- 1-((1-r2ols)*((nuobs-1)/(nuobs-nuexpvar))) betaolsse <- c(rep(NA,times=nuexpvar)) betaolsts <- c(rep(NA,times=nuexpvar)) for (i in 1:nuexpvar) {betaolsse[i] = sqrt(covols[i,i])} for (i in 1:nuexpvar) {betaolsts[i] = betaols[i]/betaolsse[i]} coeffsols <- matrix(nrow=nuexpvar,ncol=4) rownames(coeffsols) <- expvarnames colnames(coeffsols) <- c("coeff","s.e.","t-stat","MSE risk") for (i in 1:nuexpvar) {coeffsols[i,1] = betaols[i]} for (i in 1:nuexpvar) {coeffsols[i,2] = betaolsse[i]} for (i in 1:nuexpvar) {coeffsols[i,3] = betaolsts[i]} for (i in 1:nuexpvar) {coeffsols[i,4] = ridge01$rmse[1,i]} # print it all out! cat(" \n") cat("------------------------------------------------------------------ \n") cat(" \n") cat("OLS regression results\n") cat(" \n") cat("dependent variable:",yvarname,"\n") cat(" \n") cat("coeffs, se, t-stats and Obenchain's MSE risk: \n") cat(" \n") print(coeffsols) cat(" \n") cat("residual std err :",sqrt(varols[1,1]),"\n") cat("std dev of dep var:",sd(yvec),"\n") cat(" \n") cat("F-stat:",fstatols,"\n") cat("R-squared value:",r2ols,"\n") cat("Adj R-sq. value:",adjr2ols,"\n") cat(" \n") cat("number of explanatory variables:",ridge01$p,"\n") cat("number of observations:",ridge01$n,"\n") cat(" \n") cat("------------------------------------------------------------------ \n") cat(" \n") cat("single parameter ridge regression results:\n") cat(" \n") cat("dependent variable:",yvarname,"\n") cat(" \n") cat("coeffs, se (Hines etal method), t-stats and Obenchain's MSE risk: \n") cat(" \n") print(coeffs01) cat(" \n") cat("residual std err :",sqrt(varr01[1,1]),"\n") cat("std dev of dep var:",sd(yvec),"\n") cat(" \n") cat("F-stat:",fstatr01,"\n") cat("R-squared value:",r2r01,"\n") cat("Adj R-sq. value:",adjr2r01,"\n") cat(" \n") cat("number of observations:",ridge01$n,"\n") cat("number of explanatory variables:",ridge01$p,"\n") cat("multicollinearity allowance:",choosek01[min(wheremin01[]),2],"\n") cat(" \n") cat("shape parameter Q used:",ridge01$qp," Note: Q=0 in Hoerl-Kennard ridge\n") cat("value of Q most likely to be optimal:",ridge01$qmse,"\n") cat("value of k which minimizes minus-two-log-likelihood:",chosenk01,"\n") cat(" \n") cat("------------------------------------------------------------------ \n") cat(" \n") cat("2-parameter (Obenchain) ridge regression results:\n") cat(" \n") cat("dependent variable:",yvarname,"\n") cat(" \n") cat("coeffs, se (Hines etal method), t-stats and Obenchain's MSE risk: \n") cat(" \n") print(coeffs02) cat(" \n") cat("residual std err :",sqrt(varr02[1,1]),"\n") cat("std dev of dep var:",sd(yvec),"\n") cat(" \n") cat("F-stat:",fstatr02,"\n") cat("R-squared value:",r2r02,"\n") cat("Adj R-sq. value:",adjr2r02,"\n") cat(" \n") cat("number of observations:",ridge02$n,"\n") cat("number of explanatory variables:",ridge02$p,"\n") cat("multicollinearity allowance:",choosek02[min(wheremin02[]),2],"\n") cat(" \n") cat("shape parameter Q used:",ridge02$qp," Note: Q=0 in Hoerl-Kennard ridge\n") cat("value of Q most likely to be optimal:",ridge02$qmse,"\n") cat("value of k which minimizes minus-two-log-likelihood:",chosenk02,"\n") cat(" \n") cat("------------------------------------------------------------------ \n") cat(" \n")