###### FUNCTION FOR GENERATING CLUSTERED COSTS AND QALYS ### sim.data <- function(clusters, # No. of clusters cluster.size=list(mean, cv), # mean cluster size and coefficient of variation cost =list(mean, sd), # cluster-level mean cost and SD qaly =list(mean, sd), # cluster-level mean qaly and SD ICC =list(cost, qaly), # Intra-cluster correlation coefficient for cost and qaly rho =list(clust, indiv), # Corr(cost, qaly) at cluster and individual-level treat =list(cost, qaly), # treatment effect for costs and qalys ivar =list(mean, sd), # individual-level covariate mean and SD cvar =list(mean, sd), # cluster-level covariate mean and SD covariate =list(ivar, cvar), # indicating whether cvar or ivar are included as predictors alpha.cost =list(ivar, cvar), # Strength of association between cost and covariates alpha.qaly =list(ivar, cvar), # Strength of association between qaly and covariates auxiliary =list(ivar,cvar,treat), # Indicators for missingness predictors: ivar, cvar or treat p.miss =list(cost, qaly), # % of missing costs by treatment arm i.eta =list(cost, qaly), # Strength of association between ivar and p.miss by enpoint c.eta =list(cost, qaly), # Strength of association between cvar and p.miss by enpoint t.eta =list(cost, qaly)) # Strength of association between treat and p.miss by enpoint { ### No. of clusters must be an even number ### if(clusters%%2!=0) stop("number of clusters must be an even number \n" ### Cluster size distribution (unbalanced) ### cluster.size.sd <-cluster.size$mean*cluster.size$cv cluster.size.mean<-cluster.size$mean cluster.size.shape<- cluster.size.mean^2/cluster.size.sd^2 cluster.size.scale <- cluster.size.sd^2/cluster.size.mean clust.size <- round(rgamma( n=clusters, shape=cluster.size.shape, scale=cluster.size.scale), digits = 0) clust.size[clust.size==0] <- 1 clust.id <- rep(1:clusters, times=clust.size) # cluster ID indiv.id <-array(NA,length(clust.id)) for (i in 1:clusters){ indiv.id[clust.id==i] <- rep(1:clust.size[i]) } # within-cluster ID id <- 1:length(clust.id) # individual ID treat.id <- c(rep(0, sum(clust.size[1:(clusters/2)])), # treatment ID rep(1, sum(clust.size[(clusters/2+1):clusters])) ) clust.size.list<- rep(clust.size, times=clust.size) ### Individual and cluster IDs by treatment group ### clust.size0<-clust.size[1:(clusters/2)] clust.size1<-clust.size[(clusters/2+1):clusters] clust.id0<-clust.id[1:sum(clust.size[1:(clusters/2)])] clust.id1<-clust.id[(sum(clust.size[1:(clusters/2)])+1):sum(clust.size)] id0<-id[1:sum(clust.size[1:(clusters/2)])] id1<-id[(sum(clust.size[1:(clusters/2)])+1):sum(clust.size)] ### CLUSTER-LEVEL COVARIATE ### c.cvar <-rnorm( n=clusters, mean=cvar$mean, sd=cvar$sd) c.var <- rep(c.cvar, times=clust.size) # cluster-level covariate c.cvar0<-c.cvar[1:(clusters/2)] c.cvar1<-c.cvar[(clusters/2+1):clusters] ### INDIVIDUAL-LEVEL COVARIATE ### i.var <- rnorm( n=clength(id), mean=ivar$mean, sd=ivar$sd) i.var0<-i.var[1:(sum(clust.size0))] i.var1<-i.var[(sum(clust.size0)+1):sum(clust.size)] ### NORMAL DISTRIBUTED COSTS – control arm ### ### Cluster-level mean costs ### c.var.cost<-(ICC$cost/(1-ICC$cost))*(cost$sd^2) # cluster-level cost variance mean.cost0 <- rep(cost$mean0, each=clusters/2) ### Cluster-level covariate effect ### c.mean.cost0<-mean.cost0 if(covariate$cvar){ # cluster-level predictor c.var.cost<-(ICC$cost/(1-ICC$cost))*(cost$sd^2)*(1-alpha.cost$cvar^2) c.beta.cost<- (sqrt(c.var.cost)/cvar$sd)*sqrt(alpha.cost$cvar^2/(1-alpha.cost$cvar^2)) for(j in 1:(clusters/2)){ c.mean.cost0[j] <- mean.cost0[j] + c.beta.cost*c.cvar0[j] } } c.cost0 <- rnorm( n=clusters/2, mean=c.mean.cost0, sd=sqrt(c.var.cost)) muc0<-c.cost0-cost$mean0 # cluster-level costs (control) ### Individual-level costs ### i.var.cost<-(cost$sd^2) if(covariate$ivar){ i.var.cost<-(cost$sd^2)*(1-alpha.cost$ivar^2) } i.mean.cost0<- rep(NA, sum(clust.size0)) for(j in 1:(clusters/2)){ i.mean.cost0[clust.id0==j] <- rnorm( n=clust.size0[j], mean=c.cost0[j], sd=sqrt(i.var.cost)) } i.cost0<-i.mean.cost0 if(covariate$ivar){ # Individual-level predictor i.beta.cost<-(sqrt(i.var.cost)/ivar$sd)*sqrt(alpha.cost$ivar^2/(1-alpha.cost$ivar^2)) for(j in 1:length(id0)){ i.cost0[j] <- i.mean.cost0[j] + i.beta.cost*i.var0[j] }} # individual-level costs (control) ### NORMAL DISTRIBUTED COSTS – treatment arm ### ### Cluster-level mean costs ### c.var.cost<-(ICC$cost/(1-ICC$cost))*(cost$sd^2) # cluster-level cost variance mean.cost1 <- rep(cost$mean1, each=clusters/2) c.mean.cost1<-mean.cost1 if(covariate$cvar){ # cluster-level predictor c.var.cost<-(ICC$cost/(1-ICC$cost))*(cost$sd^2)*(1-alpha.cost$cvar^2) c.beta.cost<- (sqrt(c.var.cost)/cvar$sd)*sqrt(alpha.cost$cvar^2/(1-alpha.cost$cvar^2)) for(j in 1:(clusters/2)){ c.mean.cost1[j] <- mean.cost1[j] + c.beta.cost*c.cvar1[j] }} c.cost1 <- rnorm( n=clusters/2, mean=c.mean.cost1, sd=sqrt(c.var.cost)) muc1<-c.cost1-cost$mean1 # cluster-level costs (treatment) muc <-c(muc0,muc1) mu.c<-rep(muc, times=clust.size) # cluster-level costs (all) ### Individual-level costs #### i.var.cost<-(cost$sd^2) if(covariate$ivar){ i.var.cost<-(cost$sd^2)*(1-alpha.cost$ivar^2) } i.mean.cost1<- rep(NA, sum(clust.size1)) t<-0 for(j in (clusters/2+1):clusters){ t<-t+1 i.mean.cost1[clust.id1==j] <- rnorm( n=clust.size1[t], mean=c.cost1[t], sd=sqrt(i.var.cost)) } i.cost1<-i.mean.cost1 if(covariate$ivar){ # individual-level predictor i.beta.cost<-(sqrt(i.var.cost)/ivar$sd)*sqrt(alpha.cost$ivar^2/(1-alpha.cost$ivar^2)) for(j in 1:length(id1)){ i.cost1[j] <- i.mean.cost1[j] + i.beta.cost*i.var1[j] }} # individual-level costs (treatment) i.cost<-c(i.cost0,i.cost1) # individual-level costs (all) ### NORMAL DISTRIBUTED QALYs – control arm ### c.var.qaly<-(ICC$qaly/(1-ICC$qaly))*(qaly$sd^2) # cluster-level qaly variance if(is.logical(all.equal(c.var.qaly,0)) | is.logical(all.equal(c.var.cost,0)) ) { c.gamma <- 0 } else { c.gamma <-rho$clust*(sqrt(c.var.qaly)/sqrt(c.var.cost)) } c.var.qaly.adj <-c.var.qaly*(1-(rho$clust^2)) ### Cluster-level mean QALYs ### mean.qaly0 <- rep(qaly$mean0, each=clusters/2) c.mean.qaly0<-mean.qaly0 if(covariate$cvar){ # cluster-level predictor c.var.qaly.adj <-c.var.qaly*(1-rho$clust^2-alpha.qaly$cvar^2) c.beta.qaly <-(sqrt(c.var.qaly.adj)/cvar$sd)*sqrt(alpha.qaly$cvar^2/(1-alpha.qaly$cvar^2)) for(j in 1:(clusters/2)){ c.mean.qaly0[j] <- mean.qaly0[j] + c.beta.qaly*c.cvar0[j] }} c.qaly0 <- rnorm( n=clusters/2, mean=c.mean.qaly0 + c.gamma*(c.cost0-mean.cost0), sd=sqrt(c.var.qaly.adj)) mue0<-c.qaly0-qaly$mean0 # cluster-level qalys (control) ### Individual-level mean QALYs ### i.var.qaly <-(qaly$sd^2)*(1-rho$indiv^2) # SD ‘correction’ if(covariate$ivar){ i.var.qaly <-(qaly$sd^2)*(1-rho$indiv^2-alpha.qaly$ivar^2) } i.gamma <-rho$indiv*(sqrt(i.var.qaly)/cost$sd) i.mean.qaly0<- rep(NA, sum(clust.size0)) for(j in 1:(clusters/2)){ i.mean.qaly0[clust.id0==j] <- rnorm( n=clust.size0[j], mean=c.qaly0[j]+i.gamma*(i.cost0[clust.id0==j]-c.cost0[j]), sd=sqrt(i.var.qaly)) } i.qaly0<-i.mean.qaly0 if(covariate$ivar){ # individual-level predictor i.beta.qaly<-(sqrt(i.var.qaly)/ivar$sd)*sqrt(alpha.qaly$ivar^2/(1-alpha.qaly$ivar^2)) for(j in 1:length(id0)){ i.qaly0[j] <- i.mean.qaly0[j] + i.beta.qaly*i.var0[j] }} # individual-level qalys (control) ### NORMAL DISTRIBUTED QALYs – treatment arm ### c.var.qaly<-(ICC$qaly/(1-ICC$qaly))*(qaly$sd^2) # cluster-level qaly variance if(is.logical(all.equal(c.var.qaly,0)) | is.logical(all.equal(c.var.cost,0)) ){ c.gamma <- 0 } else { c.gamma <-rho$clust*(sqrt(c.var.qaly)/sqrt(c.var.cost)) } c.var.qaly.adj <-c.var.qaly*(1-(rho$clust^2)) ### Cluster-level mean QALYs #### mean.qaly1 <- rep(qaly$mean1, each=clusters/2) c.mean.qaly1<-mean.qaly1 if(covariate$cvar){ # cluster-level predictor c.var.qaly.adj <-c.var.qaly*(1-rho$clust^2-alpha.qaly$cvar^2) c.beta.qaly <-(sqrt(c.var.qaly.adj)/cvar$sd)*sqrt(alpha.qaly$cvar^2/(1-alpha.qaly$cvar^2)) for(j in 1:(clusters/2)){ c.mean.qaly1[j] <- mean.qaly1[j] + c.beta.qaly*c.cvar1[j] }} c.qaly1 <- rnorm( n=clusters/2, mean=c.mean.qaly1 + c.gamma*(c.cost1-mean.cost1), sd=sqrt(c.var.qaly.adj)) mue1<-c.qaly1-qaly$mean1 # cluster-level qalys (treatment) mue <-c(mue0,mue1) mu.e<-rep(mue, times=clust.size) # cluster-level qalys (all) ### Individual-level mean QALYs ### i.var.qaly <-(qaly$sd^2)*(1-rho$indiv^2) if(covariate$ivar){ i.var.qaly <-(qaly$sd^2)*(1-rho$indiv^2-alpha.qaly$ivar^2) } i.gamma <-rho$indiv*(sqrt(i.var.qaly)/cost$sd) i.mean.qaly1<- rep(NA, sum(clust.size1 t<-0 for(j in (clusters/2+1):clusters){ t<-t+1 i.mean.qaly1[clust.id1==j] <- rnorm( n=clust.size1[t], mean=c.qaly1[t]+i.gamma*(i.cost1[clust.id1==j]-c.cost1[t]), sd=sqrt(i.var.qaly)) } i.qaly1<-i.mean.qaly1 if(covariate$ivar){ # individual-level predictor i.beta.qaly<-(sqrt(i.var.qaly)/ivar$sd)*sqrt(alpha.qaly$ivar^2/(1-alpha.qaly$ivar^2)) for(j in 1:length(id1)){ i.qaly1[j] <- i.mean.qaly1[j] + i.beta.qaly*i.var1[j] }} # individual-level qaly (treatment) i.qaly<-c(i.qaly0,i.qaly1) # individual-level qaly (all) ### Create data frame ### cons <-rep(1, length=length(id)) data <- data.frame(i.cost, i.qaly, i.var, c.var, mu.c, mu.e, clust.size.list, clust.id, indiv.id, treat.id, id, cons) names(data) <-c("cost", "qaly", "ivar", "cvar", "mu.c", "mu.e", "clust.size", "cluster", "indiv", "treat", "id", "cons") ### GENERATING MISSING COSTS AND QALYS #### ## MCAR ### theta.cost<-log(p.miss$cost/(1-p.miss$cost)) theta.qaly<-log(p.miss$qaly/(1-p.miss$qaly)) p.c <- exp(theta.cost*data$cons)/(1+exp(theta.cost*data$cons)) p.q <- exp(theta.qaly*data$cons)/(1+exp(theta.qaly*data$cons)) ### MAR ### if(auxiliary$ivar){ # only one individual-level auxiliary variable theta.cost<-log(p.miss$cost/(1-p.miss$cost))-i.eta$cost*mean(data$ivar) theta.qaly<-log(p.miss$qaly/(1-p.miss$qaly))-i.eta$qaly*mean(data$ivar) p.c <- exp(theta.cost+i.eta$cost*data$ivar)/(1+exp(theta.cost+i.eta$cost*data$ivar)) p.q <- exp(theta.qaly+i.eta$qaly*data$ivar)/(1+exp(theta.qaly+i.eta$qaly*data$ivar)) } if(auxiliary$cvar){ # one individual and one cluster-level auxiliary variable theta.cost<-log(p.miss$cost/(1-p.miss$cost))-i.eta$cost*mean(data$ivar)- c.eta$cost*mean(data$cvar) theta.qaly<-log(p.miss$qaly/(1-p.miss$qaly))-i.eta$qaly*mean(data$ivar)- c.eta$qaly*mean(data$cvar) p.c <- exp(theta.cost+i.eta$cost*data$ivar+c.eta$cost*data$cvar)/ (1+exp(theta.cost+i.eta$cost*data$ivar+c.eta$cost*data$cvar)) p.q <- exp(theta.qaly+i.eta$qaly*data$ivar+c.eta$cost*data$cvar)/ (1+exp(theta.qaly+i.eta$qaly*data$ivar+c.eta$cost*data$cvar)) } if(auxiliary$treat){ # add a differential prob of missingness per treatment arm theta.cost<-log(p.miss$cost/(1-p.miss$cost))-i.eta$cost*mean(data$ivar)- c.eta$cost*mean(data$cvar)+t.eta$cost*mean(data$treat) theta.qaly<-log(p.miss$qaly/(1-p.miss$qaly))-i.eta$qaly*mean(data$ivar)- c.eta$qaly*mean(data$cvar)+t.eta$qaly*mean(data$treat) theta.cost1<-log(p.miss$cost/(1-p.miss$cost))-i.eta$cost*mean(data$ivar)- c.eta$cost*mean(data$cvar)-t.eta$cost*mean(data$treat) theta.qaly1<-log(p.miss$qaly/(1-p.miss$qaly))-i.eta$qaly*mean(data$ivar)- c.eta$qaly*mean(data$cvar)-t.eta$qaly*mean(data$treat) p.c0 <-exp(theta.cost+i.eta$cost*data$ivar[data$treat==0]+c.eta$cost* data$cvar[data$treat==0]-t.eta$cost*1)/(1+exp(theta.cost+i.eta$cost* data$ivar[data$treat==0]+c.eta$cost*data$cvar[data$treat==0]-t.eta$cost*1)) p.q0 <- exp(theta.qaly+i.eta$qaly*data$ivar[data$treat==0]+c.eta$qaly* data$cvar[data$treat==0]-t.eta$qaly*1)/(1+exp(theta.qaly+i.eta$qaly* data$ivar[data$treat==0]+c.eta$qaly*data$cvar[data$treat==0]-t.eta$qaly*1)) p.c1 <-exp(theta.cost1+i.eta$cost*data$ivar[data$treat==1]+c.eta$cost* data$cvar[data$treat==1]+t.eta$cost*data$treat[data$treat==1])/ (1+exp(theta.cost1+i.eta$cost*data$ivar[data$treat==1]+c.eta$cost* data$cvar[data$treat==1]+t.eta$cost*data$treat[data$treat==1])) p.q1 <-exp(theta.qaly1+i.eta$qaly*data$ivar[data$treat==1]+c.eta$qaly* data$cvar[data$treat==1]+t.eta$qaly*data$treat[data$treat==1])/ (1+exp(theta.qaly1+i.eta$qaly*data$ivar[data$treat==1]+c.eta$qaly* data$cvar[data$treat==1]+t.eta$qaly*data$treat[data$treat==1])) p.c<-c(p.c0,p.c1) p.q<-c(p.q0,p.q1) } data$miss.c<-rbinom(length(p.c),1,prob=p.c) data$miss.q<-rbinom(length(p.q),1,prob=p.q) data$cost_miss<-ifelse(data$miss.c==0,data$cost,NA) data$qaly_miss<-ifelse(data$miss.q==0,data$qaly,NA) return(data) }