# eAppendix 3 ## Set 3 G-formula: individual intercepts and time-varying variables ## ## Code written by Maarten J. Bijlsma ## ## 5 September, 2018 ## ## Max Planck Institute for Demographic Research, Germany ## # Please note that this code is annotated for your convenience # # but has not been optimized! # # load relevant libraries library(data.table) library(plm) # load data load("U:/... .RData") # a lag function that takes into account individual IDs lag1.func <- function(variabletolag, idvariable) { c(NA,ifelse(idvariable[2:length(idvariable)] == idvariable[1:(length(idvariable)-1)], variabletolag[1:(length(idvariable)-1)],NA)) } # linear probability models can make predicitons outside the 0-1 range # this needs to be constrained linprob.constrain <- function(a) { a <- ifelse(a < 0,0,a) a <- ifelse(a > 1,1,a) return(a) } # predict functions for different variable types # taking into account individual-level intercepts # multinomial models use a set of logistic models for the probability of # each possible category: with linear probability models these should sum to 1 # though due to numerical integration may end up close to 1 rather than exactly 1 # for this reason we model each possible category, rather than the number of categories minus 1 # another approach has to be taken if the models for each outcome are # non-linear (e.g. if logistic regression models are used) fe.lp.mn.predict.6var <- function(predict.x1,predict.x2,predict.x3,predict.x4,predict.x5,predict.x6, fixef.x1,fixef.x2,fixef.x3,fixef.x4,fixef.x5,fixef.x6, dataset) { # get the names of the coefficients names.vec <- names(predict.x1) # isolate the names of just main terms # and the names of terms that are interacted names.main <- names.vec[-grep(":",names.vec)] names.int <- names.vec[grep(":",names.vec)] # save part of the dataset for main terms data.main <- as.matrix(dataset[,names.main]) # make dataset for interactions int.terms <- unlist(strsplit(names.int,":")) # odd numbers are first terms, even are second terms pre.index <- seq(1,length(int.terms)-1,2) post.index <- seq(2,length(int.terms),2) # it will always be an even number, unless three-fold+ interactions exist data.int <- dataset[int.terms[pre.index]] * dataset[int.terms[post.index]] data.int <- as.matrix(data.int) # and now put everything back together x1 <- data.main %*% predict.x1[names.main] + data.int %*% predict.x1[names.int] + fixef.x1 x2 <- data.main %*% predict.x2[names.main] + data.int %*% predict.x2[names.int] + fixef.x2 x3 <- data.main %*% predict.x3[names.main] + data.int %*% predict.x3[names.int] + fixef.x3 x4 <- data.main %*% predict.x4[names.main] + data.int %*% predict.x4[names.int] + fixef.x4 x5 <- data.main %*% predict.x5[names.main] + data.int %*% predict.x5[names.int] + fixef.x5 x6 <- data.main %*% predict.x6[names.main] + data.int %*% predict.x6[names.int] + fixef.x6 x1 <- linprob.constrain(x1) x2 <- linprob.constrain(x2) x3 <- linprob.constrain(x3) x4 <- linprob.constrain(x4) x5 <- linprob.constrain(x5) x6 <- linprob.constrain(x6) # find rescale value rescaler <- 1/(x1+x2+x3+x4+x5+x6) # rescale constituent variables x1r <- x1*rescaler x2r <- x2*rescaler x3r <- x3*rescaler x4r <- x4*rescaler x5r <- x5*rescaler # x6r does not need to be determined # determine boundary values x1b <- x1r x2b <- x1r + x2r x3b <- x1r + x2r + x3r x4b <- x1r + x2r + x3r + x4r x5b <- x1r + x2r + x3r + x4r + x5r # x6b does not need to be determined # determine n n2 <- length(x1) # draw random variables decider <- runif(n2,0,1) # determine category x1c <- ifelse(decider <= x1b,1,0) x2c <- ifelse(decider > x1b & decider <= x2b,1,0) x3c <- ifelse(decider > x2b & decider <= x3b,1,0) x4c <- ifelse(decider > x3b & decider <= x4b,1,0) x5c <- ifelse(decider > x4b & decider <= x5b,1,0) x6c <- ifelse(decider > x5b,1,0) return(cbind(x1c,x2c,x3c,x4c,x5c,x6c)) } fe.lp.mn.predict.5var <- function(predict.x1,predict.x2,predict.x3,predict.x4,predict.x5, fixef.x1,fixef.x2,fixef.x3,fixef.x4,fixef.x5, dataset) { # get the names of the coefficients names.vec <- names(predict.x1) # isolate the names of just main terms # and the names of terms that are interacted names.main <- names.vec[-grep(":",names.vec)] names.int <- names.vec[grep(":",names.vec)] # save part of the dataset for main terms data.main <- as.matrix(dataset[,names.main]) # make dataset for interactions int.terms <- unlist(strsplit(names.int,":")) # odd numbers are first terms, even are second terms pre.index <- seq(1,length(int.terms)-1,2) post.index <- seq(2,length(int.terms),2) # it will always be an even number, unless three-fold+ interactions exist data.int <- dataset[int.terms[pre.index]] * dataset[int.terms[post.index]] data.int <- as.matrix(data.int) # and now put everything back together x1 <- data.main %*% predict.x1[names.main] + data.int %*% predict.x1[names.int] + fixef.x1 x2 <- data.main %*% predict.x2[names.main] + data.int %*% predict.x2[names.int] + fixef.x2 x3 <- data.main %*% predict.x3[names.main] + data.int %*% predict.x3[names.int] + fixef.x3 x4 <- data.main %*% predict.x4[names.main] + data.int %*% predict.x4[names.int] + fixef.x4 x5 <- data.main %*% predict.x5[names.main] + data.int %*% predict.x5[names.int] + fixef.x5 x1 <- linprob.constrain(x1) x2 <- linprob.constrain(x2) x3 <- linprob.constrain(x3) x4 <- linprob.constrain(x4) x5 <- linprob.constrain(x5) # find rescale value rescaler <- 1/(x1+x2+x3+x4+x5) # rescale constituent variables x1r <- x1*rescaler x2r <- x2*rescaler x3r <- x3*rescaler x4r <- x4*rescaler # x5r does not need to be determined # determine boundary values x1b <- x1r x2b <- x1r + x2r x3b <- x1r + x2r + x3r x4b <- x1r + x2r + x3r + x4r # x5b does not need to be determined # determine n n2 <- length(x1) # draw random variables decider <- runif(n2,0,1) # determine category x1c <- ifelse(decider <= x1b,1,0) x2c <- ifelse(decider > x1b & decider <= x2b,1,0) x3c <- ifelse(decider > x2b & decider <= x3b,1,0) x4c <- ifelse(decider > x3b & decider <= x4b,1,0) x5c <- ifelse(decider > x4b,1,0) return(cbind(x1c,x2c,x3c,x4c,x5c)) } fe.lp.mn.predict.4var <- function(predict.x1,predict.x2,predict.x3,predict.x4, fixef.x1,fixef.x2,fixef.x3,fixef.x4, dataset) { # get the names of the coefficients names.vec <- names(predict.x1) # isolate the names of just main terms # and the names of terms that are interacted names.main <- names.vec[-grep(":",names.vec)] names.int <- names.vec[grep(":",names.vec)] # save part of the dataset for main terms data.main <- as.matrix(dataset[,names.main]) # make dataset for interactions int.terms <- unlist(strsplit(names.int,":")) # odd numbers are first terms, even are second terms pre.index <- seq(1,length(int.terms)-1,2) post.index <- seq(2,length(int.terms),2) # it will always be an even number, unless three-fold+ interactions exist data.int <- dataset[int.terms[pre.index]] * dataset[int.terms[post.index]] data.int <- as.matrix(data.int) # and now put everything back together x1 <- data.main %*% predict.x1[names.main] + data.int %*% predict.x1[names.int] + fixef.x1 x2 <- data.main %*% predict.x2[names.main] + data.int %*% predict.x2[names.int] + fixef.x2 x3 <- data.main %*% predict.x3[names.main] + data.int %*% predict.x3[names.int] + fixef.x3 x4 <- data.main %*% predict.x4[names.main] + data.int %*% predict.x4[names.int] + fixef.x4 x1 <- linprob.constrain(x1) x2 <- linprob.constrain(x2) x3 <- linprob.constrain(x3) x4 <- linprob.constrain(x4) # find rescale value rescaler <- 1/(x1+x2+x3+x4) # rescale constituent variables x1r <- x1*rescaler x2r <- x2*rescaler x3r <- x3*rescaler # x4r does not need to be determined # determine boundary values x1b <- x1r x2b <- x1r + x2r x3b <- x1r + x2r + x3r # x4b does not need to be determined # determine n n2 <- length(x1) # draw random variables decider <- runif(n2,0,1) # determine category x1c <- ifelse(decider <= x1b,1,0) x2c <- ifelse(decider > x1b & decider <= x2b,1,0) x3c <- ifelse(decider > x2b & decider <= x3b,1,0) x4c <- ifelse(decider > x3b,1,0) return(cbind(x1c,x2c,x3c,x4c)) } multinomial.predict.4var <- function(predict.x1,predict.x2,predict.x3,predict.x4, dataset) { x1 <- predict(predict.x1,dataset,type='response') x2 <- predict(predict.x2,dataset,type='response') x3 <- predict(predict.x3,dataset,type='response') x4 <- predict(predict.x4,dataset,type='response') x1 <- linprob.constrain(x1) x2 <- linprob.constrain(x2) x3 <- linprob.constrain(x3) x4 <- linprob.constrain(x4) # find rescale value rescaler <- 1/(x1+x2+x3+x4) # rescale constituent variables x1r <- x1*rescaler x2r <- x2*rescaler x3r <- x3*rescaler # x4r does not need to be determined # determine boundary values x1b <- x1r x2b <- x1r + x2r x3b <- x1r + x2r + x3r # x4b does not need to be determined # determine n n2 <- length(x1) # draw random variables decider <- runif(n2,0,1) # determine category x1c <- ifelse(decider <= x1b,1,0) x2c <- ifelse(decider > x1b & decider <= x2b,1,0) x3c <- ifelse(decider > x2b & decider <= x3b,1,0) x4c <- ifelse(decider > x3b,1,0) return(cbind(x1c,x2c,x3c,x4c)) } multinomial.predict.4var <- function(predict.x1,predict.x2,predict.x3,predict.x4, dataset) { x1 <- predict(predict.x1,dataset,type='response') x2 <- predict(predict.x2,dataset,type='response') x3 <- predict(predict.x3,dataset,type='response') x4 <- predict(predict.x4,dataset,type='response') x1 <- linprob.constrain(x1) x2 <- linprob.constrain(x2) x3 <- linprob.constrain(x3) x4 <- linprob.constrain(x4) # find rescale value rescaler <- 1/(x1+x2+x3+x4) # rescale constituent variables x1r <- x1*rescaler x2r <- x2*rescaler x3r <- x3*rescaler # x4r does not need to be determined # determine boundary values x1b <- x1r x2b <- x1r + x2r x3b <- x1r + x2r + x3r # x4b does not need to be determined # determine n n2 <- length(x1) # draw random variables decider <- runif(n2,0,1) # determine category x1c <- ifelse(decider <= x1b,1,0) x2c <- ifelse(decider > x1b & decider <= x2b,1,0) x3c <- ifelse(decider > x2b & decider <= x3b,1,0) x4c <- ifelse(decider > x3b,1,0) return(cbind(x1c,x2c,x3c,x4c)) } fe.lp.bn.predict.1var <- function(predict.x1,fixef.x1,dataset) { # get the names of the coefficients names.vec <- names(predict.x1) # isolate the names of just main terms # and the names of terms that are interacted names.main <- names.vec[-grep(":",names.vec)] names.int <- names.vec[grep(":",names.vec)] # save part of the dataset for main terms data.main <- as.matrix(dataset[,names.main]) # make dataset for interactions int.terms <- unlist(strsplit(names.int,":")) # odd numbers are first terms, even are second terms pre.index <- seq(1,length(int.terms)-1,2) post.index <- seq(2,length(int.terms),2) # it will always be an even number, unless three-fold+ interactions exist data.int <- dataset[int.terms[pre.index]] * dataset[int.terms[post.index]] data.int <- as.matrix(data.int) # and now put everything back together x1 <- data.main %*% predict.x1[names.main] + data.int %*% predict.x1[names.int] + fixef.x1 x1 <- linprob.constrain(x1) n2 <- length(x1) return(rbinom(n2,1,x1)) } fe.linpredvar <- function(predict.x1,fixef.x1,resSD,dataset) { # get the names of the coefficients names.vec <- names(predict.x1) # isolate the names of just main terms # and the names of terms that are interacted names.main <- names.vec[-grep(":",names.vec)] names.int <- names.vec[grep(":",names.vec)] # save part of the dataset for main terms data.main <- as.matrix(dataset[,names.main]) # make dataset for interactions int.terms <- unlist(strsplit(names.int,":")) # odd numbers are first terms, even are second terms pre.index <- seq(1,length(int.terms)-1,2) post.index <- seq(2,length(int.terms),2) # it will always be an even number, unless three-fold+ interactions exist data.int <- dataset[int.terms[pre.index]] * dataset[int.terms[post.index]] data.int <- as.matrix(data.int) # and now put everything back together x1 <- data.main %*% predict.x1[names.main] + data.int %*% predict.x1[names.int] + fixef.x1 n2 <- length(x1) x1 <- x1+rnorm(n2,mean=0,sd=resSD) return(x1) } # the long.sample.fe function differs from long.sample.fe # in that it requires the originaldata to also have a column 'newID' # which can initially be filled with NA's or so # but are later filled with artificial IDs so that we get # estimated fixed effects for re-sampled copies long.sample.fe <- function(originaldata, originaldataid, idtotfut) { # select a bunch of IDs IDs <- unique(originaldataid) y <- sample(IDs,length(IDs),replace=T) z <- table(table(y)) # from there, select a group once selectID <- sample(IDs,size=z[1],replace=F) select.idtotfut <- idtotfut[which(idtotfut$id %in% selectID),] newdata <- originaldata[which(originaldataid %in% selectID),] # assign first batch of new IDs to this group newID <- 1:z[1] newdata$newID <- rep(newID,select.idtotfut$totfut) # note this requires select.idtotfut and newdata to be ordered # in the same way by ID if(length(z) > 1) { for(i in 2:length(z)) { # select a new group of (real) IDs that was not yet selected IDs2 <- setdiff(IDs,selectID) # from there, randomly select a group of people of the right size selectID2 <- sample(IDs2,size=z[i],replace=F) selectID <- c(selectID,selectID2) # so we don't re-select the newly # selected people either # and we need to know how many obs the new people have select.idtotfut <- idtotfut[which(idtotfut$id %in% selectID2),] for(j in 1:i) { # copy the new dataset i number of times and assign new ids newID <- (newID[length(newID)]+1):(newID[length(newID)]+z[i]) copy <- originaldata[which(originaldataid %in% selectID2),] copy$newID <- rep(newID,select.idtotfut$totfut) newdata <- rbind(newdata,copy) } } return(newdata) } } # make model formulas # note: time constant variables are not needed here # due to individual-level fixed effects formula.outcome <- c("outcome ~ # time constant variables # removed age.3539 + age.4044 + age.4549 + age.5055 + (female + # male as ref edu.pri + edu.sec + edu.terhi) * # edu.terlowst as ref # time varying variables (l.atv.unemp + l.atv.stud + l.atv.ret) + l.fam.single + l.fam.coh.nc + l.fam.coh.ch + l.fam.mar.nc + l.fam.mar.ch + # l.fam.child as ref l.atc.J01 + l.atc.N02A + l.atc.N02B + l.atc.N05 + l.atc.N06 + l.atc.G03 + l.atc.R06 + l.atc.R03 + l.atc.C07 + l.atc.C09 + l.atc.P01 + l.inc.tax.if + l.inc.hh.if.cu + l.AD") formula.outcome.inc.hh <- c("outcome ~ # time constant variables # removed age.3539 + age.4044 + age.4549 + age.5055 + (female + # male as ref edu.pri + edu.sec + edu.terhi) * # edu.terlowst as ref # time varying variables (l.atv.unemp + l.atv.stud + l.atv.ret) + l.fam.single + l.fam.coh.nc + l.fam.coh.ch + l.fam.mar.nc + l.fam.mar.ch + # l.fam.child as ref l.atc.J01 + l.atc.N02A + l.atc.N02B + l.atc.N05 + l.atc.N06 + l.atc.G03 + l.atc.R06 + l.atc.R03 + l.atc.C07 + l.atc.C09 + l.atc.P01 + l.inc.tax.if + l.inc.hh.if.cu + inc.tax.if + l.AD") formula.atv.emp <- as.formula(sub('outcome','atv.emp',formula.outcome)) formula.atv.unemp <- as.formula(sub('outcome','atv.unemp',formula.outcome)) formula.atv.stud <- as.formula(sub('outcome','atv.stud',formula.outcome)) formula.atv.ret <- as.formula(sub('outcome','atv.ret',formula.outcome)) formula.fam.single <- as.formula(sub('outcome','fam.single',formula.outcome)) formula.fam.coh.nc <- as.formula(sub('outcome','fam.coh.nc',formula.outcome)) formula.fam.coh.ch <- as.formula(sub('outcome','fam.coh.ch',formula.outcome)) formula.fam.mar.nc <- as.formula(sub('outcome','fam.mar.nc',formula.outcome)) formula.fam.mar.ch <- as.formula(sub('outcome','fam.mar.ch',formula.outcome)) formula.fam.child <- as.formula(sub('outcome','fam.child',formula.outcome)) formula.atc.J01 <- as.formula(sub('outcome','atc.J01',formula.outcome)) formula.atc.N02A <- as.formula(sub('outcome','atc.N02A',formula.outcome)) formula.atc.N02B <- as.formula(sub('outcome','atc.N02B',formula.outcome)) formula.atc.N05 <- as.formula(sub('outcome','atc.N05',formula.outcome)) formula.atc.N06 <- as.formula(sub('outcome','atc.N06',formula.outcome)) formula.atc.G03 <- as.formula(sub('outcome','atc.G03',formula.outcome)) formula.atc.R06 <- as.formula(sub('outcome','atc.R06',formula.outcome)) formula.atc.R03 <- as.formula(sub('outcome','atc.R03',formula.outcome)) formula.atc.C07 <- as.formula(sub('outcome','atc.C07',formula.outcome)) formula.atc.C09 <- as.formula(sub('outcome','atc.C09',formula.outcome)) formula.atc.P01 <- as.formula(sub('outcome','atc.P01',formula.outcome)) formula.inc.tax.if <- as.formula(sub('outcome','inc.tax.if',formula.outcome)) formula.inc.hh.if.cu <- as.formula(sub('outcome','inc.hh.if.cu',formula.outcome.inc.hh)) formula.AD <- as.formula(sub('outcome','AD',formula.outcome)) # save column location info col.index.from <- NULL col.index.from[1] <- grep(paste0('^','atv.emp','$'), colnames(cc.dat)) col.index.from[2] <- grep(paste0('^','atv.unemp','$'), colnames(cc.dat)) col.index.from[3] <- grep(paste0('^','atv.stud','$'), colnames(cc.dat)) col.index.from[4] <- grep(paste0('^','atv.ret','$'), colnames(cc.dat)) col.index.from[5] <- grep(paste0('^','atv.unk','$'), colnames(cc.dat)) col.index.from[6] <- grep(paste0('^','fam.single','$'), colnames(cc.dat)) col.index.from[7] <- grep(paste0('^','fam.coh.nc','$'), colnames(cc.dat)) col.index.from[8] <- grep(paste0('^','fam.coh.ch','$'), colnames(cc.dat)) col.index.from[9] <- grep(paste0('^','fam.mar.nc','$'), colnames(cc.dat)) col.index.from[10] <- grep(paste0('^','fam.mar.ch','$'), colnames(cc.dat)) col.index.from[11] <- grep(paste0('^','fam.child','$'), colnames(cc.dat)) col.index.from[12] <- grep(paste0('^','atc.J01','$'), colnames(cc.dat)) col.index.from[13] <- grep(paste0('^','atc.N02A','$'), colnames(cc.dat)) col.index.from[14] <- grep(paste0('^','atc.N02B','$'), colnames(cc.dat)) col.index.from[15] <- grep(paste0('^','atc.N05','$'), colnames(cc.dat)) col.index.from[16] <- grep(paste0('^','atc.N06','$'), colnames(cc.dat)) col.index.from[17] <- grep(paste0('^','atc.G03','$'), colnames(cc.dat)) col.index.from[18] <- grep(paste0('^','atc.R06','$'), colnames(cc.dat)) col.index.from[19] <- grep(paste0('^','atc.R03','$'), colnames(cc.dat)) col.index.from[20] <- grep(paste0('^','atc.C07','$'), colnames(cc.dat)) col.index.from[21] <- grep(paste0('^','atc.C09','$'), colnames(cc.dat)) col.index.from[22] <- grep(paste0('^','atc.P01','$'), colnames(cc.dat)) col.index.from[23] <- grep(paste0('^','inc.tax.if','$'), colnames(cc.dat)) col.index.from[24] <- grep(paste0('^','inc.hh.if.cu','$'), colnames(cc.dat)) col.index.from[25] <- grep(paste0('^','AD','$'), colnames(cc.dat)) col.index.to <- NULL col.index.to[1] <- grep(paste0('^','l.atv.emp','$'), colnames(cc.dat)) col.index.to[2] <- grep(paste0('^','l.atv.unemp','$'), colnames(cc.dat)) col.index.to[3] <- grep(paste0('^','l.atv.stud','$'), colnames(cc.dat)) col.index.to[4] <- grep(paste0('^','l.atv.ret','$'), colnames(cc.dat)) col.index.to[5] <- grep(paste0('^','l.atv.unk','$'), colnames(cc.dat)) col.index.to[6] <- grep(paste0('^','l.fam.single','$'), colnames(cc.dat)) col.index.to[7] <- grep(paste0('^','l.fam.coh.nc','$'), colnames(cc.dat)) col.index.to[8] <- grep(paste0('^','l.fam.coh.ch','$'), colnames(cc.dat)) col.index.to[9] <- grep(paste0('^','l.fam.mar.nc','$'), colnames(cc.dat)) col.index.to[10] <- grep(paste0('^','l.fam.mar.ch','$'), colnames(cc.dat)) col.index.to[11] <- grep(paste0('^','l.fam.child','$'), colnames(cc.dat)) col.index.to[12] <- grep(paste0('^','l.atc.J01','$'), colnames(cc.dat)) col.index.to[13] <- grep(paste0('^','l.atc.N02A','$'), colnames(cc.dat)) col.index.to[14] <- grep(paste0('^','l.atc.N02B','$'), colnames(cc.dat)) col.index.to[15] <- grep(paste0('^','l.atc.N05','$'), colnames(cc.dat)) col.index.to[16] <- grep(paste0('^','l.atc.N06','$'), colnames(cc.dat)) col.index.to[17] <- grep(paste0('^','l.atc.G03','$'), colnames(cc.dat)) col.index.to[18] <- grep(paste0('^','l.atc.R06','$'), colnames(cc.dat)) col.index.to[19] <- grep(paste0('^','l.atc.R03','$'), colnames(cc.dat)) col.index.to[20] <- grep(paste0('^','l.atc.C07','$'), colnames(cc.dat)) col.index.to[21] <- grep(paste0('^','l.atc.C09','$'), colnames(cc.dat)) col.index.to[22] <- grep(paste0('^','l.atc.P01','$'), colnames(cc.dat)) col.index.to[23] <- grep(paste0('^','l.inc.tax.if','$'), colnames(cc.dat)) col.index.to[24] <- grep(paste0('^','l.inc.hh.if.cu','$'), colnames(cc.dat)) col.index.to[25] <- grep(paste0('^','l.AD','$'), colnames(cc.dat)) # the order of the above variables in the index.from and index.to # must be the same! # and of course, the length of both vectors must also be the same length(col.index.from) length(col.index.to) intersect(col.index.from,col.index.to) bssize <- 500 # number of bootstrap iterations nvar <- 24 # number of variables we want to save info on ncol <- 19 # numberof follow-up time units mits <- 10 # number of Monte Carlo Error reduction iterations # arrays for Monte Carlo Error reduction loop monte.mat.nc <- rep(NA,nvar*ncol*mits) monte.mat.int <- rep(NA,nvar*ncol*mits) dim(monte.mat.nc) <- c(nvar,ncol,mits) dim(monte.mat.int) <- c(nvar,ncol,mits) # arrays for final output output.mat.nc <- rep(NA,nvar*ncol*bssize) output.mat.int <- rep(NA,nvar*ncol*bssize) dim(output.mat.nc) <- c(nvar,ncol,bssize) dim(output.mat.int) <- c(nvar,ncol,bssize) # and the subgroup analyses monte.mat.nc.sub <- rep(NA,ncol*8*mits) # 8 since 2sexes*2edugroups*2vars(ADandUnemp) monte.mat.int.sub <- rep(NA,ncol*4*mits) # 4 since 2sexes*2edugroups*1var(AD) dim(monte.mat.nc.sub ) <- c(8,ncol,mits) dim(monte.mat.int.sub) <- c(4,ncol,mits) output.mat.nc.sub <- rep(NA,ncol*8*bssize) output.mat.int.sub <- rep(NA,ncol*4*bssize) dim(output.mat.nc.sub) <- c(8,ncol,bssize) dim(output.mat.int.sub) <- c(4,ncol,bssize) fixefs <- NULL t1 <- Sys.time() for(bs in 1:bssize) { # sample individuals from cc.dat g.sample <- long.sample.fe(cc.dat,cc.dat$id,id.totfut) # (re)fit models to g.sample # bsf = bootstrap fit # estimate relations fit.bsf.atv.emp <- plm(formula.atv.emp, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atv.unemp <- plm(formula.atv.unemp, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atv.stud <- plm(formula.atv.stud, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atv.ret <- plm(formula.atv.ret, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.fam.single <- plm(formula.fam.single, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.fam.coh.nc <- plm(formula.fam.coh.nc, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.fam.coh.ch <- plm(formula.fam.coh.ch, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.fam.mar.nc <- plm(formula.fam.mar.nc, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.fam.mar.ch <- plm(formula.fam.mar.ch, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.fam.child <- plm(formula.fam.child, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.J01 <- plm(formula.atc.J01, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.N02A <- plm(formula.atc.N02A, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.N02B <- plm(formula.atc.N02B, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.N05 <- plm(formula.atc.N05, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.N06 <- plm(formula.atc.N06, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.G03 <- plm(formula.atc.G03, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.R06 <- plm(formula.atc.R06, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.R03 <- plm(formula.atc.R03, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.C07 <- plm(formula.atc.C07, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.C09 <- plm(formula.atc.C09, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.atc.P01 <- plm(formula.atc.P01, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.inc.tax.if <- plm(formula.inc.tax.if, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.inc.hh.if.cu <- plm(formula.inc.hh.if.cu, effect='individual',model='within',index='newID',data=g.sample) fit.bsf.inc.tax.if.SD <- sd(fit.bsf.inc.tax.if$residuals) fit.bsf.inc.hh.if.cu.SD <- sd(fit.bsf.inc.hh.if.cu$residuals) fit.bsf.AD <- plm(formula.AD, effect='individual',model='within',index='newID',data=g.sample) # now we need also the fixed effects for each individual # so we extract those here and use those later in the g-formula fixefs.atv <- NULL fixefs.atv$idnr <- unique(index(fit.bsf.atv.emp)$newID) fixefs.atv$atv.emp <- fixef(fit.bsf.atv.emp,type='level') fixefs.atv$atv.unemp <- fixef(fit.bsf.atv.unemp,type='level') fixefs.atv$atv.stud <- fixef(fit.bsf.atv.stud,type='level') fixefs.atv$atv.ret <- fixef(fit.bsf.atv.ret,type='level') fixefs.atv <- data.frame(fixefs.atv) fixefs.fam <- NULL fixefs.fam$idnr <- unique(index(fit.bsf.fam.single)$newID) fixefs.fam$fam.single <- fixef(fit.bsf.fam.single,type='level') fixefs.fam$fam.coh.nc <- fixef(fit.bsf.fam.coh.nc,type='level') fixefs.fam$fam.coh.ch <- fixef(fit.bsf.fam.coh.ch,type='level') fixefs.fam$fam.mar.nc <- fixef(fit.bsf.fam.mar.nc,type='level') fixefs.fam$fam.mar.ch <- fixef(fit.bsf.fam.mar.ch,type='level') fixefs.fam$fam.child <- fixef(fit.bsf.fam.child,type='level') fixefs.fam <- data.frame(fixefs.fam) fixefs.atc <- NULL fixefs.atc$idnr <- unique(index(fit.bsf.atc.J01)$newID) fixefs.atc$atc.J01 <- fixef(fit.bsf.atc.J01,type='level') fixefs.atc$atc.N02A <- fixef(fit.bsf.atc.N02A,type='level') fixefs.atc$atc.N02B <- fixef(fit.bsf.atc.N02B,type='level') fixefs.atc$atc.N05 <- fixef(fit.bsf.atc.N05,type='level') fixefs.atc$atc.N06 <- fixef(fit.bsf.atc.N06,type='level') fixefs.atc$atc.G03 <- fixef(fit.bsf.atc.G03,type='level') fixefs.atc$atc.R06 <- fixef(fit.bsf.atc.R06,type='level') fixefs.atc$atc.R03 <- fixef(fit.bsf.atc.R03,type='level') fixefs.atc$atc.C07 <- fixef(fit.bsf.atc.C07,type='level') fixefs.atc$atc.C09 <- fixef(fit.bsf.atc.C09,type='level') fixefs.atc$atc.P01 <- fixef(fit.bsf.atc.P01,type='level') fixefs.inc.tax <- NULL fixefs.inc.tax$idnr <- unique(index(fit.bsf.inc.tax.if)$newID) fixefs.inc.tax$inc.tax.if <- fixef(fit.bsf.inc.tax.if,type='level') fixefs.inc.tax <- data.frame(fixefs.inc.tax) fixefs.inc.hh <- NULL fixefs.inc.hh$idnr <- unique(index(fit.bsf.inc.hh.if.cu)$newID) fixefs.inc.hh$inc.hh.if.cu <- fixef(fit.bsf.inc.hh.if.cu,type='level') fixefs.inc.hh <- data.frame(fixefs.inc.hh) fixefs.atc$AD <- fixef(fit.bsf.AD,type='level') fixefs.atc <- data.frame(fixefs.atc) for(m in 1:mits) { ## this is the Monte Carlo Error reduction loop ## # I take the average over a few iterations # since each iteration has variation that is not due to # sampling variability, but due to probablistic predictions # and we are not interested in that type of error # take individuals at time 0 g.sample <- g.sample[g.sample$year==1995,] # and the same file, but then for the intervention loop g.sample.2 <- g.sample g.sample$group <- 0 ## NATURAL LOOP ## # start a loop that moves through the calendar years for(t in 1996:2013) { g.sample.temp <- g.sample[g.sample$year==(t-1),] g.sample.temp$year <- g.sample.temp$year+1 g.sample.temp$age <- g.sample.temp$age+1 g.sample.temp$fut <- g.sample.temp$fut+1 g.sample.temp$age.3539 <- ifelse(g.sample.temp$age >= 35 & g.sample.temp$age < 40,1,0) g.sample.temp$age.4044 <- ifelse(g.sample.temp$age >= 40 & g.sample.temp$age < 45,1,0) g.sample.temp$age.4549 <- ifelse(g.sample.temp$age >= 45 & g.sample.temp$age < 50,1,0) g.sample.temp$age.5055 <- ifelse(g.sample.temp$age >= 50,1,0) # lag values: since the new values for t have not yet been produced # I can actually take the column information from a row at t # and put it in the same row at t but then in a column meant for # lagged values # the first are the 'from' columns, and the second the 'to' columns # so: take values from the the 'from' column and put them in the 'to' column g.sample.temp[,col.index.to] <- g.sample.temp[,col.index.from] # put together with entire dataset g.sample <- rbind(g.sample,g.sample.temp) rm(g.sample.temp) # order by ID and then year within ID variable (needed for lags below) g.sample <- g.sample[order(g.sample$idnr,g.sample$year),] # predict atv (job) x <- fe.lp.mn.predict.4var(fit.bsf.atv.emp$coef,fit.bsf.atv.unemp$coef,fit.bsf.atv.stud$coef,fit.bsf.atv.ret$coef, fixefs.atv$atv.emp,fixefs.atv$atv.unemp,fixefs.atv$atv.stud,fixefs.atv$atv.ret, g.sample[g.sample$year==t,]) g.sample$atv.emp[g.sample$year==t] <- x[,1] g.sample$atv.unemp[g.sample$year==t] <- x[,2] g.sample$atv.stud[g.sample$year==t] <- x[,3] g.sample$atv.ret[g.sample$year==t] <- x[,4] rm(x) # predict partnership (fam) x <- fe.lp.mn.predict.6var(fit.bsf.fam.single$coef,fit.bsf.fam.coh.nc$coef,fit.bsf.fam.coh.ch$coef,fit.bsf.fam.mar.nc$coef,fit.bsf.fam.mar.ch$coef,fit.bsf.fam.child$coef, fixefs.fam$fam.single,fixefs.fam$fam.coh.nc,fixefs.fam$fam.coh.ch,fixefs.fam$fam.mar.nc,fixefs.fam$fam.mar.ch,fixefs.fam$fam.child, g.sample[g.sample$year==t,]) g.sample$fam.single[g.sample$year==t] <- x[,1] g.sample$fam.coh.nc[g.sample$year==t] <- x[,2] g.sample$fam.coh.ch[g.sample$year==t] <- x[,3] g.sample$fam.mar.nc[g.sample$year==t] <- x[,4] g.sample$fam.mar.ch[g.sample$year==t] <- x[,5] g.sample$fam.child[g.sample$year==t] <- x[,6] rm(x) # predict drug purchases (ATC) g.sample$atc.J01[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.J01$coef,fixefs.atc$atc.J01,g.sample[g.sample$year==t,]) g.sample$atc.N02A[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N02A$coef,fixefs.atc$atc.N02A,g.sample[g.sample$year==t,]) g.sample$atc.N02B[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N02B$coef,fixefs.atc$atc.N02B,g.sample[g.sample$year==t,]) g.sample$atc.N05[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N05$coef,fixefs.atc$atc.N05,g.sample[g.sample$year==t,]) g.sample$atc.N06[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N06$coef,fixefs.atc$atc.N06,g.sample[g.sample$year==t,]) g.sample$atc.G03[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.G03$coef,fixefs.atc$atc.G03,g.sample[g.sample$year==t,]) g.sample$atc.R06[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.R06$coef,fixefs.atc$atc.R06,g.sample[g.sample$year==t,]) g.sample$atc.R03[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.R03$coef,fixefs.atc$atc.R03,g.sample[g.sample$year==t,]) g.sample$atc.C07[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.C07$coef,fixefs.atc$atc.C07,g.sample[g.sample$year==t,]) g.sample$atc.C09[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.C09$coef,fixefs.atc$atc.C09,g.sample[g.sample$year==t,]) g.sample$atc.P01[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.P01$coef,fixefs.atc$atc.P01,g.sample[g.sample$year==t,]) ## predict income g.sample$inc.tax.if[g.sample$year==t] <- fe.linpredvar(fit.bsf.inc.tax.if$coef,fixefs.inc.tax$inc.tax.if,fit.bsf.inc.tax.if.SD,g.sample[g.sample$year==t,]) g.sample$inc.hh.if.cu[g.sample$year==t] <- fe.linpredvar(fit.bsf.inc.hh.if.cu$coef,fixefs.inc.hh$inc.hh.if.cu,fit.bsf.inc.hh.if.cu.SD,g.sample[g.sample$year==t,]) # predict AD prescribing g.sample$AD[g.sample$year==t] <- fe.lp.bn.predict.1var(fit.bsf.AD$coef,fixefs.atc$AD,g.sample[g.sample$year==t,]) # end loop } # SUBGROUP ANALYSIS PART 1 # save names of those belonging to the sex * education groups # in the Natural Course # only look to the last year of follow-up # identify last observation of an individual: g.sample.lastyear <- g.sample[g.sample$idnr != shift(g.sample$idnr,1,type='lead'),] id.male.second <- unique(g.sample.lastyear$idnr[g.sample.lastyear$female==0 & (g.sample.lastyear$edu.pri==1 | g.sample.lastyear$edu.sec==1)]) id.male.tert <- unique(g.sample.lastyear$idnr[g.sample.lastyear$female==0 & (g.sample.lastyear$edu.terlowst==1 | g.sample.lastyear$edu.terlow==1 | g.sample.lastyear$edu.terhi==1)]) id.female.second <- unique(g.sample.lastyear$idnr[g.sample.lastyear$female==1 & (g.sample.lastyear$edu.pri==1 | g.sample.lastyear$edu.sec==1)]) id.female.tert <- unique(g.sample.lastyear$idnr[g.sample.lastyear$female==1 & (g.sample.lastyear$edu.terlowst==1 | g.sample.lastyear$edu.terlow==1 | g.sample.lastyear$edu.terhi==1)]) rm(g.sample.lastyear) # intervention ! (part 1/2) # make sure anyone who is unemployed at fut==1 is set to employed # and then set their unemployment to 0 g.sample.2$atv.emp[g.sample.2$atv.unemp==1] <- 1 g.sample.2$atv.unemp[g.sample.2$atv.unemp==1] <- 0 for(t in 1996:2013) { g.sample.2.temp <- g.sample.2[g.sample.2$year==(t-1),] g.sample.2.temp$year <- g.sample.2.temp$year+1 g.sample.2.temp$age <- g.sample.2.temp$age+1 g.sample.2.temp$fut <- g.sample.2.temp$fut+1 g.sample.2.temp$age.3539 <- ifelse(g.sample.2.temp$age >= 35 & g.sample.2.temp$age < 40,1,0) g.sample.2.temp$age.4044 <- ifelse(g.sample.2.temp$age >= 40 & g.sample.2.temp$age < 45,1,0) g.sample.2.temp$age.4549 <- ifelse(g.sample.2.temp$age >= 45 & g.sample.2.temp$age < 50,1,0) g.sample.2.temp$age.5055 <- ifelse(g.sample.2.temp$age >= 50,1,0) # lag values: since the new values for t have not yet been produced # I can actually take the column information from a row at t # and put it in the same row at t but then in a column meant for # lagged values # the first are the 'from' columns, and the second the 'to' columns # so: take values from the the 'from' column and put them in the 'to' column g.sample.2.temp[,col.index.to] <- g.sample.2.temp[,col.index.from] # put together with entire dataset g.sample.2 <- rbind(g.sample.2,g.sample.2.temp) rm(g.sample.2.temp) # order by ID variable (needed for lags below) g.sample.2 <- g.sample.2[order(g.sample.2$idnr,g.sample.2$year),] ## predict functions here # predict atv (job) x <- fe.lp.mn.predict.4var(fit.bsf.atv.emp$coef,fit.bsf.atv.unemp$coef,fit.bsf.atv.stud$coef,fit.bsf.atv.ret$coef, fixefs.atv$atv.emp,fixefs.atv$atv.unemp,fixefs.atv$atv.stud,fixefs.atv$atv.ret, g.sample.2[g.sample.2$year==t,]) g.sample.2$atv.emp[g.sample.2$year==t] <- x[,1] g.sample.2$atv.unemp[g.sample.2$year==t] <- x[,2] g.sample.2$atv.stud[g.sample.2$year==t] <- x[,3] g.sample.2$atv.ret[g.sample.2$year==t] <- x[,4] rm(x) # ! intervene (part 2/2): give people a job if they're unemployed g.sample.2$atv.emp[g.sample.2$atv.unemp==1] <- 1 g.sample.2$atv.unemp[g.sample.2$atv.unemp==1] <- 0 # predict partnership (fam) x <- fe.lp.mn.predict.6var(fit.bsf.fam.single$coef,fit.bsf.fam.coh.nc$coef,fit.bsf.fam.coh.ch$coef,fit.bsf.fam.mar.nc$coef,fit.bsf.fam.mar.ch$coef,fit.bsf.fam.child$coef, fixefs.fam$fam.single,fixefs.fam$fam.coh.nc,fixefs.fam$fam.coh.ch,fixefs.fam$fam.mar.nc,fixefs.fam$fam.mar.ch,fixefs.fam$fam.child, g.sample.2[g.sample.2$year==t,]) g.sample.2$fam.single[g.sample.2$year==t] <- x[,1] g.sample.2$fam.coh.nc[g.sample.2$year==t] <- x[,2] g.sample.2$fam.coh.ch[g.sample.2$year==t] <- x[,3] g.sample.2$fam.mar.nc[g.sample.2$year==t] <- x[,4] g.sample.2$fam.mar.ch[g.sample.2$year==t] <- x[,5] g.sample.2$fam.child[g.sample.2$year==t] <- x[,6] rm(x) # predict drug purchases (ATC) g.sample.2$atc.J01[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.J01$coef,fixefs.atc$atc.J01,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.N02A[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N02A$coef,fixefs.atc$atc.N02A,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.N02B[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N02B$coef,fixefs.atc$atc.N02B,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.N05[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N05$coef,fixefs.atc$atc.N05,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.N06[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.N06$coef,fixefs.atc$atc.N06,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.G03[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.G03$coef,fixefs.atc$atc.G03,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.R06[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.R06$coef,fixefs.atc$atc.R06,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.R03[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.R03$coef,fixefs.atc$atc.R03,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.C07[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.C07$coef,fixefs.atc$atc.C07,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.C09[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.C09$coef,fixefs.atc$atc.C09,g.sample.2[g.sample.2$year==t,]) g.sample.2$atc.P01[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.atc.P01$coef,fixefs.atc$atc.P01,g.sample.2[g.sample.2$year==t,]) ## predict income g.sample.2$inc.tax.if[g.sample.2$year==t] <- fe.linpredvar(fit.bsf.inc.tax.if$coef,fixefs.inc.tax$inc.tax.if,fit.bsf.inc.tax.if.SD,g.sample.2[g.sample.2$year==t,]) g.sample.2$inc.hh.if.cu[g.sample.2$year==t] <- fe.linpredvar(fit.bsf.inc.hh.if.cu$coef,fixefs.inc.hh$inc.hh.if.cu,fit.bsf.inc.hh.if.cu.SD,g.sample.2[g.sample.2$year==t,]) # predict AD prescribing g.sample.2$AD[g.sample.2$year==t] <- fe.lp.bn.predict.1var(fit.bsf.AD$coef,fixefs.atc$AD,g.sample.2[g.sample.2$year==t,]) # end loop } # save covariate information (including AD) # in the natural course and the intervention scenario # by fut and monte carlo iteration for(t in 1:19) { g.sample.fut <- g.sample[g.sample$fut==t,] monte.mat.nc[1,t,m] <- mean(g.sample.fut$atv.emp,na.rm=T) monte.mat.nc[2,t,m] <- mean(g.sample.fut$atv.unemp,na.rm=T) monte.mat.nc[3,t,m] <- mean(g.sample.fut$atv.stud,na.rm=T) monte.mat.nc[4,t,m] <- mean(g.sample.fut$atv.ret,na.rm=T) monte.mat.nc[5,t,m] <- mean(g.sample.fut$fam.single,na.rm=T) monte.mat.nc[6,t,m] <- mean(g.sample.fut$fam.coh.nc,na.rm=T) monte.mat.nc[7,t,m] <- mean(g.sample.fut$fam.coh.ch,na.rm=T) monte.mat.nc[8,t,m] <- mean(g.sample.fut$fam.mar.nc,na.rm=T) monte.mat.nc[9,t,m] <- mean(g.sample.fut$fam.mar.ch,na.rm=T) monte.mat.nc[10,t,m] <- mean(g.sample.fut$fam.child,na.rm=T) monte.mat.nc[11,t,m] <- mean(g.sample.fut$atc.J01,na.rm=T) monte.mat.nc[12,t,m] <- mean(g.sample.fut$atc.N02A,na.rm=T) monte.mat.nc[13,t,m] <- mean(g.sample.fut$atc.N02B,na.rm=T) monte.mat.nc[14,t,m] <- mean(g.sample.fut$atc.N05,na.rm=T) monte.mat.nc[15,t,m] <- mean(g.sample.fut$atc.N06,na.rm=T) monte.mat.nc[16,t,m] <- mean(g.sample.fut$atc.G03,na.rm=T) monte.mat.nc[17,t,m] <- mean(g.sample.fut$atc.R06,na.rm=T) monte.mat.nc[18,t,m] <- mean(g.sample.fut$atc.R03,na.rm=T) monte.mat.nc[19,t,m] <- mean(g.sample.fut$atc.C07,na.rm=T) monte.mat.nc[20,t,m] <- mean(g.sample.fut$atc.C09,na.rm=T) monte.mat.nc[21,t,m] <- mean(g.sample.fut$atc.P01,na.rm=T) monte.mat.nc[22,t,m] <- mean(g.sample.fut$inc.tax.if,na.rm=T) monte.mat.nc[23,t,m] <- mean(g.sample.fut$inc.hh.if.cu,na.rm=T) monte.mat.nc[24,t,m] <- mean(g.sample.fut$AD,na.rm=T) # subgroup analysis for NC monte.mat.nc.sub[1,t,m] <- mean(g.sample.fut$AD[which(g.sample.fut$idnr %in% id.male.second)],na.rm=T) monte.mat.nc.sub[2,t,m] <- mean(g.sample.fut$AD[which(g.sample.fut$idnr %in% id.male.tert)],na.rm=T) monte.mat.nc.sub[3,t,m] <- mean(g.sample.fut$AD[which(g.sample.fut$idnr %in% id.female.second)],na.rm=T) monte.mat.nc.sub[4,t,m] <- mean(g.sample.fut$AD[which(g.sample.fut$idnr %in% id.female.tert)],na.rm=T) monte.mat.nc.sub[5,t,m] <- mean(g.sample.fut$atv.unemp[which(g.sample.fut$idnr %in% id.male.second)],na.rm=T) monte.mat.nc.sub[6,t,m] <- mean(g.sample.fut$atv.unemp[which(g.sample.fut$idnr %in% id.male.tert)],na.rm=T) monte.mat.nc.sub[7,t,m] <- mean(g.sample.fut$atv.unemp[which(g.sample.fut$idnr %in% id.female.second)],na.rm=T) monte.mat.nc.sub[8,t,m] <- mean(g.sample.fut$atv.unemp[which(g.sample.fut$idnr %in% id.female.tert)],na.rm=T) g.sample.2.fut <- g.sample.2[g.sample.2$fut==t,] monte.mat.int[1,t,m] <- mean(g.sample.2.fut$atv.emp,na.rm=T) monte.mat.int[2,t,m] <- mean(g.sample.2.fut$atv.unemp,na.rm=T) monte.mat.int[3,t,m] <- mean(g.sample.2.fut$atv.stud,na.rm=T) monte.mat.int[4,t,m] <- mean(g.sample.2.fut$atv.ret,na.rm=T) monte.mat.int[5,t,m] <- mean(g.sample.2.fut$fam.single,na.rm=T) monte.mat.int[6,t,m] <- mean(g.sample.2.fut$fam.coh.nc,na.rm=T) monte.mat.int[7,t,m] <- mean(g.sample.2.fut$fam.coh.ch,na.rm=T) monte.mat.int[8,t,m] <- mean(g.sample.2.fut$fam.mar.nc,na.rm=T) monte.mat.int[9,t,m] <- mean(g.sample.2.fut$fam.mar.ch,na.rm=T) monte.mat.int[10,t,m] <- mean(g.sample.2.fut$fam.child,na.rm=T) monte.mat.int[11,t,m] <- mean(g.sample.2.fut$atc.J01,na.rm=T) monte.mat.int[12,t,m] <- mean(g.sample.2.fut$atc.N02A,na.rm=T) monte.mat.int[13,t,m] <- mean(g.sample.2.fut$atc.N02B,na.rm=T) monte.mat.int[14,t,m] <- mean(g.sample.2.fut$atc.N05,na.rm=T) monte.mat.int[15,t,m] <- mean(g.sample.2.fut$atc.N06,na.rm=T) monte.mat.int[16,t,m] <- mean(g.sample.2.fut$atc.G03,na.rm=T) monte.mat.int[17,t,m] <- mean(g.sample.2.fut$atc.R06,na.rm=T) monte.mat.int[18,t,m] <- mean(g.sample.2.fut$atc.R03,na.rm=T) monte.mat.int[19,t,m] <- mean(g.sample.2.fut$atc.C07,na.rm=T) monte.mat.int[20,t,m] <- mean(g.sample.2.fut$atc.C09,na.rm=T) monte.mat.int[21,t,m] <- mean(g.sample.2.fut$atc.P01,na.rm=T) monte.mat.int[22,t,m] <- mean(g.sample.2.fut$inc.tax.if,na.rm=T) monte.mat.int[23,t,m] <- mean(g.sample.2.fut$inc.hh.if.cu,na.rm=T) monte.mat.int[24,t,m] <- mean(g.sample.2.fut$AD,na.rm=T) # subgroup analysis for Int monte.mat.int.sub[1,t,m] <- mean(g.sample.2.fut$AD[which(g.sample.2.fut$idnr %in% id.male.second)],na.rm=T) monte.mat.int.sub[2,t,m] <- mean(g.sample.2.fut$AD[which(g.sample.2.fut$idnr %in% id.male.tert)],na.rm=T) monte.mat.int.sub[3,t,m] <- mean(g.sample.2.fut$AD[which(g.sample.2.fut$idnr %in% id.female.second)],na.rm=T) monte.mat.int.sub[4,t,m] <- mean(g.sample.2.fut$AD[which(g.sample.2.fut$idnr %in% id.female.tert)],na.rm=T) # we don't need to save unemp info, since we know it is 0 } } ## now we put the findings in the output array # averaging over the monte carlo iterations # but keeping sampling variability for(i in 1:24) { # natural course output.mat.nc[i,,bs] <- apply(monte.mat.nc[i,,],MARGIN=1,FUN=mean,na.rm=T) # intervention output.mat.int[i,,bs] <- apply(monte.mat.int[i,,],MARGIN=1,FUN=mean,na.rm=T) } # and the subgroup analyses for(i in 1:8) { output.mat.nc.sub[i,,bs] <- apply(monte.mat.nc.sub[i,,],MARGIN=1,FUN=mean,na.rm=T) } for(i in 1:4) { output.mat.int.sub[i,,bs] <- apply(monte.mat.int.sub[i,,],MARGIN=1,FUN=mean,na.rm=T) } print(bs) } t2 <- Sys.time() t2 - t1 # natural course output.mat.nc # intervention output.mat.int # natural course for subsets output.mat.nc.sub # intervention for subsets output.mat.int.sub