library(quantreg) library(data.tree) ### package data.tree was used in fast quantile ### process regression function rqp.tree() rm(list = ls()) ########################################################################## # Fast quantile process regression. # The below algorithm rqp.tree() was written by Yonggang Yao, a programmer # in SAS Institute Inc. The corresponding fast quantile regression # computation method was already submitted to apply United States Patent, # with the U.S. Patent Application Number: 20180181541. ########################################################################## rqp.tree=function(formula,level=8, data, subset, weights, na.action, method = "br", model = TRUE, contrasts = NULL, ...) { call <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") mf <- eval.parent(mf) if (method == "model.frame") return(mf) mt <- attr(mf, "terms") weights <- as.vector(model.weights(mf)) Y <- model.response(mf) if (method == "sfn") { if (requireNamespace("MatrixModels") && requireNamespace("Matrix")) { X <- MatrixModels::model.Matrix(mt, data, sparse = TRUE) vnames <- dimnames(X)[[2]] X <- as(X, "matrix.csr") mf$x <- X } } else X <- model.matrix(mt, mf, contrasts) eps <- .Machine$double.eps^(2/3) tolr = 1e-5; nPar = ncol(X); # print(nPar); nObs = nrow(X); # Allocate Beta matrix nTauB = 2^level-1; qpBeta = matrix(nrow=nTauB,ncol=nPar); qpObj = vector(length=nTauB); tau_id = toString(2^(level-1)); rnode = Node$new(tau_id) if (length(weights)) { if(any(weights<0)) { stop("negative weights not allowed") } else { rnode$X=X*weights; rnode$Y=Y*weights; } } else { rnode$X=X; rnode$Y=Y; } rnode$XL = matrix(0,ncol=nPar,nrow=1); rnode$XU = matrix(0,ncol=nPar,nrow=1); rnode$YL = 0; rnode$YU = 0; rm(X,Y); cnt=0; for(i in 1:level) { nTauB=2^(i-1); base=2*nTauB; for(j in 1:nTauB) { ctimeCopy1=Sys.time() cnt=cnt+1; tau_idx=(2*j-1)*2^(level-i); tau_id = toString(tau_idx); pnode = FindNode(rnode,tau_id); pnode$tau=tau_idx/2^level; tau = tau_idx/2^level; X = rbind(pnode$X,pnode$XL,pnode$XU) Y = c(pnode$Y,pnode$YL,pnode$YU) pnode$fitted <- rq.fit(X, Y, tau = tau, method, ...) qpBeta[tau_idx,]=pnode$fitted$coefficients; qpObj[tau_idx]=(tau-0.5)*sum(pnode$fitted$residuals)+ 0.5*sum(abs(pnode$fitted$residuals)) if(itolr; cnode$XU=pnode$XU+colSums(pnode$X[cidx,]); cnode$YU=pnode$YU+sum(pnode$Y[cidx]); }else { cnode$X=pnode$X; cnode$Y=pnode$Y; cnode$XU=pnode$XU; cnode$YU=pnode$YU; } pnode$AddChildNode(cnode) # Allocate right child node. tau_id = toString(tau_idx+2^(level-i-1)) cnode=Node$new(tau_id) cidx =pnode$fitted$residuals[1:cnObs]>= -tolr; cnode$XU=pnode$XU; cnode$YU=pnode$YU; if((sum(cidx)+3*nPar)= t] nu = length(tmpu) if (nu>=1) { denom = rep(1, times=nu) for (j in 1:nu) { denom[j] = sum( (tmpu[j] <= CC)-(tmpu[j] <= TT) ) } denom[which(denom==0)] = 100000 mu0 = exp( -sum(1/denom) ) } return(mu0) } ######################################################################## # the function to estimate \rho(m|gamma, C; hatmu0) ######################################################################## rhodistri <- function(gam, m, hatmu0) { ## gam is a n*J vector ## m (recurrent event times) and C (censoring times) are n*1 vectors ## output is a n*J matrix n <- length(m) J <- length(gam[1, ]) m2 <- matrix( rep(m, J), nrow=n ) C2 <- matrix( rep(hatmu0, J), nrow=n ) (gam*C2)^m2*exp(-gam*C2)/gamma(m2+1) } ######################################################################### # the intermediate step to estimate beta ######################################################################### estbetam <- function(m, C, X, betam, esttau, hatmu0) { ## X: covariate matrix ## betam: the initial value of beta ## output is a ntau*p matrix eXbeta <- exp(X%*%betam) ## n*ntau matrix esttau2 <- matrix( rep(esttau, each=n), nrow=n) ## n*ntau matrix gdisgam <- (esttau2[, 2:ntau]-esttau2[, 1:(ntau-1)])/(eXbeta[, 2:ntau]-eXbeta[, 1:(ntau-1)]) rhodism <- rhodistri(gam=eXbeta[, 1:(ntau-1)], m=m, hatmu0) ## n*(ntau-1) tmp <- rhodism*(esttau2[, 2:ntau]-esttau2[, 1:(ntau-1)]) hdisgam <- tmp/matrix(rep(apply(tmp, 1, sum), ntau-1), nrow=n) weight <- as.vector(hdisgam) ## vector of length n*J ly <- as.vector( eXbeta[, 1:(ntau-1)] ) lX <- kronecker(matrix(1, ntau-1, 1), X) # rq( log(ly)~lX-1, esttau, weights=weight, method="fn")$coeff coef = rqp.tree( log(ly)~lX-1, level=7, weights=weight, method="fn" )$Beta t( coef ) } ########################################################################### # the iteration to get final estimator beta ########################################################################### emest <- function(m, C, X, esttau, hatmu0, tol, betam0, Nem) { e <- 1 betam1 <- betam0 estmm <- array( 0, dim=c(Nem, p, ntau) ) while (e <= Nem) { betam2 <- estbetam(m, C, X, betam=betam1, esttau, hatmu0) estmm[e, , ] <- betam2 e <- e+1 if ( sum( (betam1-betam2)^2 ) < tol ) break betam1 <- betam2 } return( estmm[e-1,,] ) } ######################################################################## ## the function to generate data ######################################################################## data.generate <- function(n, b0, b1, b2, bc, type) { X1 <- runif(n, 0, 1) X2 <- rbinom(n, 1, 0.5) X <- cbind(1, X1, X2) if (type=="1") ### homogeneous case with error~N(0,1) { epsilon <- rnorm(n, 0, 1) gam <- exp( b0 + b1*X1 + b2*X2 + bc*epsilon ) } if (type=="2") ### homogeneous case with error~t3 { epsilon <- rt(n, 3) gam <- exp( b0 + b1*X1 + b2*X2 + bc*epsilon ) } if (type=="3") ### heteogeneous case with error~N(0,1) { epsilon <- rnorm(n, 0, 1) gam <- exp( b0 + b1*X1 + b2*X2 + bc*(1 + X1 + X2)*epsilon ) } C <- hatgam <- m <- rep(0, times=n) TT = CC = IND = NULL mti = matrix(0, nrow=n, ncol=200) for (i in 1:n) { ### Poisson process was generated by sums of independent exp(1), with rate gam from ### a quantile regression model ti <- cumsum( rexp(200, rate=gam[i]) ) mti[i, ] = ti C[i] <- runif(1, 2/3, 1) m[i] <- sum( ti <= C[i] ) if (m[i] >= 1) { TT = c(TT, ti[1:m[i]]) CC = c(CC, rep(C[i], times=m[i])) IND = c(IND, rep(i, times=m[i])) } if (m[i] == 0) { TT = c(TT, C[i]) CC = c(CC, C[i]) IND = c(IND, i) } } hatmu0 = rep(1, times=n) for (i in 1:n) { hatmu0[i] = estmu0(TT, CC, C[i]) ### the naive estimator of gamma, which results in the naive estimator of beta hatgam[i] <- max(1, m[i])/hatmu0[i] } data = NULL data$m = m data$C = C data$X = X data$ind = 1:n data$IND = IND data$TT = TT data$CC = CC data$hatgam = hatgam data$hatmu0 = hatmu0 data$mti = mti data } ########################################################################## ## the function to get bootstrap SE and CI ########################################################################## boot.func <- function(data, NSBW, level) { m = data$m C = data$C X = data$X IND = data$IND TT = data$TT CC = data$CC hatgam = data$hatgam hatmu0 = data$hatmu0 mti = data$mti p = dim(X)[2] esttau = seq(1, 2^level-1)/2^level ntau = length(esttau) tmpm = NULL for (i in 1:n) { tmpm = c(tmpm, rep(i, times=m[i])) } bootest = array(NA, dim=c(NSBW, p, ntau)) for (l in 1:NSBW) { bos = sample(1:n, replace = TRUE) BOS = NULL for (i in 1:n) { BOS = c(BOS, which( tmpm == bos[i] ) ) } boshatgam = boshatmu0 = rep(0, times=n) bosmti = mti[bos, ] for (i in 1:n) { boshatmu0[i] = estmu0(TT[BOS], CC[BOS], C[bos][i]) boshatgam[i] <- max(1, m[bos][i])/boshatmu0[i] } bosbetam0 = rq( log(boshatgam)~X[bos, ]-1, esttau)$coeff bootest[l,,] = emest(m[bos], C[bos], X[bos, ], esttau, boshatmu0, tol=0.01, bosbetam0, Nem=500) } boot.se = apply(bootest, 2:3, sd) boot.lci = apply(bootest, 2:3, function(x) quantile(x, probs=0.025)) boot.uci = apply(bootest, 2:3, function(x) quantile(x, probs=0.975)) out.boot = rbind(boot.se, boot.lci, boot.uci) out.boot } ####################################################################### ## begin simulation ####################################################################### n = 500 ### sample size NS = 500 ### number of simulation p = 3 ### 1 + dimensionality of covariates X NSBW = 100 ### number of bootstrap times level = 7 esttau = seq(1, 2^level-1)/2^level ### the equally spaced grids that divide (0,1) ntau = length(esttau) ### number of grids ### the parameters to determine simulation setups #### ac = 1 b0 = (log(3)+1)*ac b1 = b2 = 1*ac bc = 0.5 ### or bc = 0.1 in type="3" type = "1" ### true quantile regression coefficients under the specified simulation setups ### if (type=="1") { true <- rbind( b0 + bc*qnorm(esttau), b1, b2) } if (type=="2") { true <- rbind( b0 + bc*qt(esttau,3), b1, b2) } if (type=="3") { true <- rbind( b0 + bc*qnorm(esttau), b1 + bc*qnorm(esttau), b2 + bc*qnorm(esttau)) } naiveest_array <- firstest_array <- ESE_array <- array(0, dim=c(NS, p, ntau) ) low_array <- upp_array <- array(0, dim=c(NS, p, ntau)) ctime = Sys.time() for (k in 1:NS) { data = data.generate(n, b0, b1, b2, bc, type) hatgam = data$hatgam X = data$X m = data$m C = data$C hatmu0 = data$hatmu0 # betam0 <- rq( log(hatgam)~X-1, esttau)$coeff betam0 <- t( rqp.tree( log(hatgam)~X-1, level=7 )$Beta ) naiveest_array[k, , ] <- betam0 firstest_array[k, , ] <- emest(m, C, X, esttau, hatmu0, tol=0.01, betam0, Nem=500) out.boot <- boot.func(data, NSBW, level) ESE_array[k, , ] = out.boot[1:p, ] low_array[k, , ] = out.boot[(p+1):(2*p), ] upp_array[k, , ] = out.boot[(2*p+1):(3*p), ] out.result = rbind( firstest_array[k, , ], naiveest_array[k, , ], ESE_array[k, , ], low_array[k, , ], upp_array[k, ,]) write.csv(out.result, file = paste(recu, k, ".csv", sep="" )) print(k) } Sys.time()-ctime