######################################### ### Censored Quantile Regression ### ### (Peng and Huang, JASA 2008) ### ### for academic research only ### ### ### ### test dataset and documentation ### ### are provided by Shuang Ji ### ######################################### ########################################################################### ### function for obtaining variance and confidence interval from resampling ### var.out=average variance ### conf.ub=97.5 percentile of beta from resampling ### conf.lb=2.5 percentile of beta from resampling ########################################################################### sumry.var.conf<-function(all.rsmp.beta) { var.out<-array(rep(0, L.tau.seq*(cvt.length+1)), c(L.tau.seq, cvt.length+1)) conf.ub<-array(rep(0, L.tau.seq*(cvt.length+1)), c(L.tau.seq, cvt.length+1)) conf.lb<-array(rep(0, L.tau.seq*(cvt.length+1)), c(L.tau.seq, cvt.length+1)) sel.tau.indx<-(sum(tau.seq<=tau.L)+1): (sum(tau.seq<=tau.U)) for(k in 1:(cvt.length+1)) { var.out[,k]<-apply(all.rsmp.beta[, seq(k, (cvt.length+1)*B.num, (cvt.length+1))], 1, FUN=var) } for(k in 1:(cvt.length+1)) { for(tau.k in 1:L.tau.seq) { tmp.srt<-sort(all.rsmp.beta[tau.k, seq(k, (cvt.length+1)*B.num, (cvt.length+1))]) conf.ub[tau.k, k]<-tmp.srt[U.indx] conf.lb[tau.k, k]<-tmp.srt[L.indx] } } cbind(var.out, conf.ub, conf.lb) } ################################################################################### ### function for second stage inference ### est.cnst=average effect from point estimation ### mean.cnst=average effect from resampling ### var.cnst=variance of average effect from resampling ### sig.test=test statistic for overall signifiance - null dist'n is chisq ### cnst.test.w1=test statistic for constancy using weight 1 - null dist'n is chisq ### cnst.test.w2=test statistic for constancy using weight 2 - null dist'n is chisq ### p.sig.test=p-value for the overall significance test ### p.cnst.test.w1=p-value for the constancy test using weight 1 ### p.cnst.test.w2=p-value for the constancy test using weight 2 ################################################################################### sumry.cnst.gnfit<-function(all.rsmp.beta) { var.out<-array(rep(0, L.tau.seq*(cvt.length+1)), c(L.tau.seq, cvt.length+1)) sel.tau.indx<-(sum(tau.seq<=tau.L)+1):(sum(tau.seq<=tau.U)) chop.tau.seq<-tau.seq[sel.tau.indx] sel.rsmp.beta<-all.rsmp.beta[sel.tau.indx,] est.const<-apply(est.beta.seq[sel.tau.indx,]*tau.step, 2, FUN=sum)/(tau.U-tau.L) mean.cnst<-rep(0, cvt.length+1) var.cnst<-rep(0, cvt.length+1) ub.cnst<-rep(0, cvt.length+1) lb.cnst<-rep(0, cvt.length+1) gnt.w1.lb<-rep(0, cvt.length+1) gnt.w1.ub<-rep(0, cvt.length+1) gnt.w2.lb<-rep(0, cvt.length+1) gnt.w2.ub<-rep(0, cvt.length+1) gnt.test.w1<-rep(0, cvt.length+1) gnt.test.w2<-rep(0, cvt.length+1) gnt.test.w1.var <- rep(0,cvt.length+1) gnt.test.w2.var <- rep(0,cvt.length+1) sig.test <- rep(0,cvt.length+1) cnst.test.w1 <- rep(0,cvt.length+1) cnst.test.w2 <- rep(0,cvt.length+1) p.sig.test <- rep(0,cvt.length+1) p.cnst.test.w1 <- rep(0,cvt.length+1) p.cnst.test.w2 <- rep(0,cvt.length+1) for(k in 1:(cvt.length+1)) { var.out[,k]<-apply((all.rsmp.beta[, seq(k, (cvt.length+1)*B.num, (cvt.length+1))]-est.beta.seq[,k])^2, 1, FUN=mean) cnst.rsmp<-apply(sel.rsmp.beta[, seq(k, (cvt.length+1)*B.num, (cvt.length+1))]*tau.step, 2, FUN=sum)/(tau.U-tau.L) gn.indx<-(chop.tau.seq>=(tau.L+tau.U)/2) diff.rsmp<-t(t(sel.rsmp.beta[gn.indx, seq(k, (cvt.length+1)*B.num, (cvt.length+1))])-as.vector(cnst.rsmp)) gnt.rsmp<-apply(diff.rsmp*tau.step, 2, FUN=sum) gnt.test.w1[k]<-sum(((est.beta.seq[sel.tau.indx,])[gn.indx, k]-est.const[k])*tau.step)*sqrt(num) gnt.w1.lb[k]<-sort(gnt.rsmp)[L.indx]*sqrt(num) gnt.w1.ub[k]<-sort(gnt.rsmp)[U.indx]*sqrt(num) gnt.test.w1.var[k] <- var(gnt.rsmp*sqrt(num)) gn.indx<-(chop.tau.seq<(tau.L+tau.U)/2) diff.rsmp<-t(t(sel.rsmp.beta[gn.indx, seq(k, (cvt.length+1)*B.num, (cvt.length+1))])-as.vector(cnst.rsmp)) gnt.rsmp<-apply(diff.rsmp*tau.step, 2, FUN=sum) gnt.w2.lb[k]<-sort(gnt.rsmp)[L.indx]*sqrt(num) gnt.w2.ub[k]<-sort(gnt.rsmp)[U.indx]*sqrt(num) gnt.test.w2[k]<-sum(((est.beta.seq[sel.tau.indx,])[gn.indx, k]-est.const[k])*tau.step)*sqrt(num) gnt.test.w2.var[k] <- var(gnt.rsmp*sqrt(num)) mean.cnst[k]<-mean(cnst.rsmp) var.cnst[k]<-var(cnst.rsmp) ub.cnst[k]<-(sort(cnst.rsmp))[U.indx] lb.cnst[k]<-(sort(cnst.rsmp))[L.indx] } sig.test <- est.const^2/var.cnst cnst.test.w1 <- gnt.test.w1^2/gnt.test.w1.var cnst.test.w2 <- gnt.test.w2^2/gnt.test.w2.var p.sig.test <- 1-pchisq(sig.test,1) p.cnst.test.w1 <- 1-pchisq(cnst.test.w1,1) p.cnst.test.w2 <- 1-pchisq(cnst.test.w2,1) cbind(est.const, mean.cnst, var.cnst, sig.test, cnst.test.w1, cnst.test.w2, p.sig.test, p.cnst.test.w1, p.cnst.test.w2, gnt.test.w1,gnt.test.w1.var,gnt.w1.lb,gnt.w1.ub,gnt.test.w2,gnt.test.w2.var,gnt.w2.lb,gnt.w2.ub) } ##################################################### ### DATA FORMAT ### first column: observed time X ### second column: censoring status (0 for censored) ### third column: constant 1 ### rest of columns: covariates ### Note: The program may not handle missing data ##################################################### ### save data to object "da" according the format stated above ### assign covariate names to "char.cvt" da <- as.matrix(read.csv("testda1.csv", header=T)) da <- da[apply(is.na(da),1,sum)==0,] num <- dim(da)[1] cvt.length<-dim(da)[2]-3 ### number of covariates char.cvt=c("Intercept","Z1","Z2","Z3","Z4") B.num<-250 ### number of resampling U.indx<-round(B.num*0.975) L.indx<-round(B.num*0.025) tau.step<-0.01 ### size of grid for estimation tau.seq<-seq(0, 0.8, tau.step)[-1] ### tau range of interest L.tau.seq<-length(tau.seq) tau.L<-0.1 ### lower bound of tau range for the trimmed mean effect tau.U<-0.7 ### upper bound of tau range for the trimmed mean effect max.lmt<-10000 dHt<-(-log(1-tau.seq))-(-log(1-c(0, tau.seq[1:(L.tau.seq-1)]))) ### point estimation starts library(quantreg) pseudo.resp<-c(da[,1]*da[,2], max.lmt, max.lmt) da.cvt<-da[,3:(3+cvt.length)] n.cvt.1<-(t(-da.cvt))%*%da[,2] est.beta.seq<-rbind(c(-Inf, rep(0, cvt.length))) L.bsq<-1 ind.conv<-1 while(L.bsq<=L.tau.seq&ind.conv==1) { est.beta<-rep(0, cvt.length+1) tau<-tau.seq[L.bsq] tmp.cmp<-(da[,1]>=da.cvt%*%t(est.beta.seq))+0 n.cvt.2<-2*(t(da.cvt))%*%(tmp.cmp%*%dHt[1:L.bsq]) pseudo.cvt<-rbind(da.cvt*da[,2], as.vector(n.cvt.1), as.vector(n.cvt.2)) est.beta<-rq.fit.br(pseudo.cvt, pseudo.resp, ci=F)$coef ind.conv<-as.numeric(abs(max.lmt-t(est.beta)%*%n.cvt.1)>1)*as.numeric(abs(max.lmt-t(est.beta)%*%n.cvt.2)>1) if(ind.conv==1) { L.bsq<-L.bsq+1 est.beta.seq<-rbind(est.beta.seq, est.beta) } } if(ind.conv==0) est.beta.seq<-rbind(est.beta.seq, t(array(rep(est.beta.seq[L.bsq,], L.tau.seq-L.bsq+1), c(cvt.length+1, L.tau.seq-L.bsq+1)))) est.beta.seq<-est.beta.seq[-1,] ### point estimaiton ends ### resampling starts all.rsmp.beta<-NULL rsmp.sim<-0 while(rsmp.sim=da[,3:(3+cvt.length)]%*%t(rsmp.beta.seq))+0 rsmp.n.cvt.2<-2*(t(rsmp.da.cvt))%*%(tmp.cmp%*%dHt[1:L.rsmp.bsq]) rsmp.pseudo.cvt<-rbind(rsmp.da.cvt*da[,2], as.vector(rsmp.n.cvt.1), as.vector(rsmp.n.cvt.2)) rsmp.beta<-rq.fit.br(rsmp.pseudo.cvt, rsmp.pseudo.resp, ci=F)$coef rsmp.ind.conv<-as.numeric(abs(max.lmt-t(rsmp.beta)%*%rsmp.n.cvt.1)>1)*as.numeric(abs(max.lmt-t(rsmp.beta)%*%rsmp.n.cvt.2)>1) if(rsmp.ind.conv==1) { rsmp.beta.seq<-rbind(rsmp.beta.seq, rsmp.beta) L.rsmp.bsq<-L.rsmp.bsq+1 } } if(rsmp.ind.conv==0) rsmp.beta.seq<-rbind(rsmp.beta.seq, t(array(rep(rsmp.beta.seq[L.rsmp.bsq,], L.tau.seq-L.rsmp.bsq+1), c(cvt.length+1, L.tau.seq-L.rsmp.bsq+1)))) rsmp.beta.seq<-rsmp.beta.seq[-1,] all.rsmp.beta<-cbind(all.rsmp.beta, rsmp.beta.seq) rsmp.sim<-rsmp.sim+1 } ### resampling ends ### extract results for tau=0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8 sel.indx<-c(0.1/tau.step, 0.2/tau.step, 0.3/tau.step, 0.4/tau.step, 0.5/tau.step, 0.6/tau.step, 0.7/tau.step, 0.8/tau.step) sel.est.beta<-est.beta.seq[sel.indx,] sumry.out.1<-sumry.var.conf(all.rsmp.beta) sumry.out.2<-sumry.cnst.gnfit(all.rsmp.beta) sel.var.beta<-sumry.out.1[sel.indx, 1:(cvt.length+1)] sel.confint.ub<-sumry.out.1[sel.indx, (cvt.length+2):(2*cvt.length+2)] sel.confint.lb<-sumry.out.1[sel.indx, (2*cvt.length+3):(3*cvt.length+3)] indx.incl<-(tau.L/tau.step):(tau.U/tau.step) L.indx.incl<-length(indx.incl) cat(c(sel.est.beta, sel.var.beta, sel.confint.ub, sel.confint.lb, sumry.out.2[,1:9]), "\n", append=T, file="testda1.out") ############################################### ### summarize results ######################### ############################################### res<-read.table("testda1.out") out1<-t(matrix(apply(res[, 1:(8*(1+cvt.length))], 2, mean),ncol=1+cvt.length))[, 1:7] # Point estimates out2<-t(matrix(apply(res[, (1+8*(1+cvt.length)):(16*(1+cvt.length))], 2, mean), ncol=1+cvt.length))[, 1:7] #Average Variance output1<-round(cbind(c(out1), sqrt(c(out2))), 3) output1<-cbind(rep(char.cvt,7), c(t(array(rep(seq(0.1, 0.7, 0.1), 1+cvt.length), c(7, 1+cvt.length)))), output1) output1<-data.frame(output1) names(output1)<-c(" ","Tau", "Estimates", "AveSE") out3 <- matrix(apply(res[, (1+32*(1+cvt.length)):(33*(1+cvt.length))], 2, mean)) # Trimmed mean effect out4 <- matrix(apply(res[, (1+34*(1+cvt.length)):(35*(1+cvt.length))], 2, mean)) # Variance of the trimmed mean effect out5 <- matrix(apply(res[, (1+35*(1+cvt.length)):(36*(1+cvt.length))], 2, mean)) # Chi-sq test statistic for overall significance out6 <- matrix(apply(res[, (1+36*(1+cvt.length)):(37*(1+cvt.length))], 2, mean)) # Chi-sq test statistic for constancy - weight 1 out7 <- matrix(apply(res[, (1+37*(1+cvt.length)):(38*(1+cvt.length))], 2, mean)) # Chi-sq test statistic for constancy - weight 2 out8 <- matrix(apply(res[, (1+38*(1+cvt.length)):(39*(1+cvt.length))], 2, mean)) # p-valuce for overall significance test out9 <- matrix(apply(res[, (1+39*(1+cvt.length)):(40*(1+cvt.length))], 2, mean)) # p-valuce for constancy test - weight 1 out10 <- matrix(apply(res[, (1+40*(1+cvt.length)):(41*(1+cvt.length))], 2, mean)) # p-valuce for constancy test - weight 2 output2<-round(cbind(c(out3), c(sqrt(out4)), c(out5), c(out8), c(out6), c(out9), c(out7), c(out10)), 3) output2<-data.frame(output2) names(output2)<-c("Avg.eff", "Se.avg.eff", "Sig.test.stat", "P.sig.test", "Cnst.test.w1", "P.cnst.w1", "Cnst.test.w2", "P.cnst.w2") ### output results ############################ output1 # point estimates and variance estimates output2 # second-stage inferences L.bsq*tau.step # largest tau to which the estimation converges ############################################### ### plot ###################################### ############################################### plot.tau.seq <- seq(0.1, 0.6, tau.step) plot.indx <- plot.tau.seq/tau.step plot.var.beta<-sumry.out.1[plot.indx, 1:(cvt.length+1)] plot.beta.seq <- est.beta.seq[plot.indx,] CI.lb.seq <- plot.beta.seq-1.96*sqrt(plot.var.beta) CI.ub.seq <- plot.beta.seq+1.96*sqrt(plot.var.beta) grp.frm.1<-apply(CI.lb.seq, 2, min)*0.99 grp.frm.2<-apply(CI.ub.seq, 2, max)*1.01 op<-par(mfrow=c(ceiling((cvt.length+1)/2), 2)) for(k in 1:(cvt.length+1)) { plot(plot.tau.seq, plot.beta.seq[, k], ylim=c(grp.frm.1[k], grp.frm.2[k]), type="s", xlab="tau", ylab="Coefficient", main=char.cvt[k]) lines(plot.tau.seq, CI.lb.seq[,k], type="s", lty=3) lines(plot.tau.seq, CI.ub.seq[,k], type="s", lty=3) lines(plot.tau.seq, rep(0, length(plot.tau.seq)), lty=2) } par(op)