###########################################################################################
#          code for analyzing doubly censored data with known left censoring time         #
###########################################################################################

############### functions to be used in the estimation and inference procedure ############

H <- function(x){
  y <- -log(1-x)
  return(y)
}

weightf0 <- function(v,l,u){
   weight <- rep(1,length(v))
   return(weight)
}

weightf1 <- function(v,l,u){
   weight <- v>=(l+u)/2
   return(weight)
}

test.stat <- function(bb,taugrid,weightf,l,u){
   a <- tail(which(taugrid<=l),1)
   b <- which(taugrid>=u)[1]
   v1 <- c(taugrid[(a+1):(b-1)],u)
   v2 <- c(l,taugrid[(a+1):(b-1)])
   vv <- v1-v2
   weight <- weightf(v2,l,u)
   stat <- t(bb[a:(b-1)]*weight)%*%vv
   return(stat)
}

test.m <- function(m,taugrid,weightf,l,u){
   ln <- dim(m)[1]
   p <- dim(m)[2]
   tt <- rep(NA,p)
   eff <- rep(NA,p)
   for (i in 1:p)
   {
       mtemp <- m[,i]
       tt[i] <- test.stat(mtemp,taugrid,weightf,l,u)
   }
   return(tt)
}

getp <- function(stat,statstar){
   stat.se <- sqrt(apply(statstar,2,var,na.rm=T))
   ptemp1 <- pnorm(stat/stat.se)
   ptemp2 <- 1-ptemp1
   AA <- array(NA,c(1,p,2))
   AA[,,1] <- ptemp1
   AA[,,2] <- ptemp2
   pval <- 2*apply(AA,c(1,2),min)
   return(list(pval,stat.se))
}

####################### function that returns the point estimation ##################
# dat = data set to analyze
# t0 = truncation time for the conditional version of the quantile regression model;
#      to fit the unconditional version, simply set t0 = 0.
# w = the perturbation random variables, used for the resampling-based inference 
#      procedure; for the point estimation, it is set to be a vector of 1.
# g = link function 
#####################################################################################

sol.LK <- function(dat,t0,w,g,g.inv){
    
    II <- dat$X>t0
    X <- dat$X-t0
    L <- dat$L-t0
    Delta <- dat$delta
    Z <- (as.matrix(cbind(rep(1,n),dat[,!names(dat) %in% c("X","L","delta")])))*II
    g.inv.X <- rep(0,n)
    g.inv.X[X>0] <- g.inv(X[X>0])

    bhat <- array(NA,c(Ln,p))
    Y <- (Delta==1)*g.inv.X*w
    W0 <- (Delta==1)*Z*w

    Y[n+1] <- Y[n+2] <- R
    W1 <- -apply(W0,2,sum)

    j <- 1
    while (j<=Ln)
    {
          temp <- rep(0,n)
          quant <- rep(0,n)
          for (k in 1:j)
          {
               if (k>1) quant <- g(Z%*%bhat[k-1,])
               temp <- temp+(L<=quant)*(X>quant)*(H(taugrid[k])-ifelse(k==1,0,H(taugrid[k-1])))
          }
          W2 <- 2*apply(w*Z*as.vector(temp),2,sum)
          W <- rbind(W0,W1,W2)
          bhat[j,]<- rq(Y~W-1)$coefficients

          # check if the solution did converge by examining the difference of estimation at 
          # two consecutive quantiles
          if (j>=2)
          {
               if (sum(abs(bhat[j,]-bhat[j-1,])>M)>0)
               {
                    bhat[j,] <- rep(NA,p)
                    break
               }
          }
          j <- j+1
     }
     return(bhat)
}
 

#################### function that returns the analysis results ###################
# dat = data matrix that contains the dataset to analyze; should only include:
#       X: observed times (failure events or censored events)
#       L: left censoring times
#       delta: censoring indicator (1: failure; 2: left censored; 3: right censored) 
#       Z: covariate matrix
# p0 = quantile of t0 among the observed time X. For the uncondtional model p0=0
# l = lower bound of tau for the 2nd-stage inference (e.g., 0.1)
# u = upper bound of tau for the 2nd-stage inference (e.g., 0.7)
# B = number of resamples for the inference procedure (e.g., 500)
# g = link function: Q(tau|Z)=g{Z*beta(tau)}
# g.inv = inverse function of g
# graph = flag for plotting the regression coefficients (1=Yes)
##################################################################################

data.est <- function(dat,p0,l,u,B,g,g.inv,graph){   
     ################## get the point estimates of beta(tau) #####################
    
     t0 <- ifelse(p0==0,0,quantile(dat$X,p0))
     bhat <- sol.LK(dat,t0,rep(1,n),g,g.inv)
     
     ################### start the resampling-based inference procedure ##########
    
     bstar <- ddstar <- array(NA,c(Ln,p,B))
     gamma1.star <- gamma2.star <- effavg.star <- array(NA,c(B,p))
    
     for (r in 1:B)
     {
        set.seed(r)
        zeta <- rexp(n,1)
        bstar[,,r] <- sol.LK(dat,t0,zeta,g,g.inv)
        
        gamma1.star[r,] <- test.m(bstar[,,r],taugrid,weightf0,l,u)    
        effavg.star[r,] <- gamma1.star[r,]/(u-l)                        
        ddstar[,,r] <- bstar[,,r]-rep(1,Ln)%*%t(effavg.star[r,])
        gamma2.star[r,] <- test.m(ddstar[,,r],taugrid,weightf1,l,u)    
        
        if (r %in% c(B*(1:10)/10) ) 
        {
           print(paste(100*r/B,"% of the resampling is completed"))
        }
     }

     ############## variance estimates for beta(tau) from resampling ############
     bhat.var <- apply(bstar,c(1,2),var,na.rm=T)
     bhat.se <- sqrt(bhat.var)                        

     ############# start testing for overall significance and constancy #########

     gamma1 <- test.m(bhat,taugrid,weightf0,l,u)    # test stat for overall significance
     gamma1.se <- getp(gamma1,gamma1.star)[[2]]     # se of gamma1
     gamma1.p <- getp(gamma1,gamma1.star)[[1]]      # p-value for the overall significance
     eff.avg <- gamma1/(u-l)                        # average covariate effect
     eff.avg.se <- gamma1.se/sqrt(u-l)              # se of average covariate effect
     dd <- bhat-rep(1,Ln)%*%t(eff.avg)
     gamma2 <- test.m(dd,taugrid,weightf1,l,u)      # test stat for constancy     
     gamma2.se <- getp(gamma2,gamma2.star)[[2]]     # se of gamma2
     gamma2.p <- getp(gamma2,gamma2.star)[[1]]      # p-value for the contancy
    
     ######################## plot the estimated regression quantiles ###########
     ### all coefficients are plotted on the same panel, with 3 covariate per row

     if (graph==1)
     {
         l.indx <- l*100
         u.indx <- u*100
         xx <- taugrid[l.indx:u.indx]
         yy <- bhat[l.indx:u.indx,]
         yylow <- yy-1.96*bhat.se[l.indx:u.indx,]
         yyup <- yy+1.96*bhat.se[l.indx:u.indx,]
         xxname <- c("Intercept",paste("Z",1:(p-1)))
         layout(t(array(c(1:(ceiling(p/3)*3)),c(3,ceiling(p/3)))))
         for (i in 1:p)
         {
              ylim.low <- min(yylow[,i])-0.1
              ylim.up <- max(yyup[,i])+0.1
              plot(xx,yy[,i],xlab="tau",ylab="regression quantile",ylim=c(ylim.low,ylim.up),type="s")
              points(xx,yylow[,i],pch=20)
              points(xx,yyup[,i],pch=20)
              abline(h=0,col="grey")
          title(xxname[i])
         }
     }

     return(list(coeff=bhat,coeff.se=bhat.se,avg.effect=list(avg=eff.avg,avg.se=eff.avg.se)#
                 test.sig=list(test.stat=gamma1,test.se=gamma1.se,p.val=gamma1.p), # 
                 test.const=list(test.stat=gamma2,test.se=gamma2.se,p.val=gamma2.p)))
}

###################################################################################
#                                  a data example                                 #
###################################################################################

############################  structure of the data ###############################
# columns of covariates ( "z1" and "z2" in the example), and
# 3 columns of the time variables, which must be named as follows: 
# "X": observed time; 
# "L": left censoring time; 
# "delta": censoring indicator (1=failure, 2=left censored, 3=right censored)
##################################################################################

dat <- read.csv("F:/Double Censoring/code_for_analysis/data_example.csv",sep=",")

R <<- 1e+8
M <<- 10
taugrid <<- seq(0.01,0.8,0.01)                         # grid of tau
Ln <<- length(taugrid)                                 # size of the grid
n <<- dim(dat)[1]
Z <- cbind(rep(1,n),dat[,!names(dat) %in% c("X","L","delta")])
p <<- dim(Z)[2]
p0 <- 0                                                # fit the unconditional model
l <- 0.1                                               # lower bound of tau of interest
u <- 0.7                                               # upper bound of tau of interest
B <- 500                                               # number if resamples for inference
g <- exp                                               # link function
g.inv <- log                                           # inverse function of g
graph <- 1                                             # a graph will be displayed


res <- data.est(dat,p0,l,u,B,g,g.inv,graph)            # data.est is the function for analysis

res$coeff        ### point estimate of beta(tau)
res$ceoff.se     ### se of beta(tau)
res$avg.effect   ### average covariate effects over the tau range [l,u]
res$test.sig     ### results for the overall significance test over the tau range [l,u]
res$test.const   ### results for the constancy test over the tau range [l,u]