#####################################################
###  Dependence Measure for Semi-competing Risks  ###              
###  for academic research only                   ###       
###  No covariates are involved                   ###      
#####################################################
library(survival)
library(MASS)  
library(quantreg)

#######################################################
### DATA FORMAT
### 1st column: observed non-terminal event time X
### 2nd column: observed terminal event time Y
### 3rd column: left truncation time L
### 4rd column: dependent censoring status (0 is censored)
### 5th column: independent censoring status (0 is censored)
### Note: The program may not handle missing data
#######################################################

### save data to object "da0" according the format stated above 
da0=as.data.frame(read.csv("testdat.csv", header=T))
names(da0)=c("X","Y","L0","delta","eta")
da0$D=da0$Y-da0$L0
da0=da0[with(da0,order(D)),] 
num=nrow(da0)
max.lmt=10^7

### K-M estimate for D starts
data.censor=data.frame(y=da0$D,cens=1-da0$eta)
ft=survfit(Surv(y,cens)~1,data=data.censor,conf.type="none")
if(min(summary(ft)$surv)==0){
  censor.surv=cbind(c(1,summary(ft)$surv),c(0,summary(ft)$time))
}
if(min(summary(ft)$surv)>0){
  censor.surv=cbind(c(1,summary(ft)$surv,0),c(0,summary(ft)$time, max.lmt))
}
csurv=vector() ### K-M estimate at each D observation
for(i in 1:num)
{
  index=min(which(round(da0$D[i],10)<=round(censor.surv[,2],10)))
  if(round(da0$D[i],10)<round(censor.surv[index,2],10)){
    csurv[i]=censor.surv[index-1,1]
  }
  if(round(da0$D[i],10)==round(censor.surv[index,2],10)){
    csurv[i]=censor.surv[index,1]
  }
}
### K-M estimate for D ends

### Set-ups
cvt.length=1
char.cvt=c("Intercept","LCQQR")

tau=0.5  ### chosen tau

t0.L=0.55 ### lower bound of t0 range for the trimmed mean effect       
t0.U=1 ### upper bound of t0 range for the trimmed mean effect
t0.step=0.01  ### size of grid
t0.seq=seq(t0.L, t0.U, t0.step)
L.t0.seq<-length(t0.seq) 
t0.w1<-(as.numeric(t0.seq<(t0.L+t0.U)/2))[1: (L.t0.seq-1)] ### weight function for constancy test

### point estimation starts
est.beta.seq=NULL #estimate of betas, a L.t0.seq by (cvt.length+1) matrix
est.var.seq=NULL #variance of betas, a L.t0.seq by (cvt.length+1) matrix   
inf.est.func=NULL 
ind.conv=1 
ind.conv.var=rep(1,cvt.length+1) 
curr.inf=array(rep(0,num*(cvt.length+1)), c(num, cvt.length+1))
rank=cvt.length+1
L.bsq=1 

csurv.x=csurv
if(length(which(csurv.x==0))>0)
{
  csurv.x[which(csurv.x==0&da0$eta==0)]=1
}

while(L.bsq<=L.t0.seq&ind.conv==1&mean(ind.conv.var)==1&rank==(cvt.length+1))
{
  t0=t0.seq[L.bsq]
  x=as.numeric(da0$X>t0) 
  z=cbind(rep(1,num),x)
    
  da=data.frame(L0=da0$L0,Y=da0$Y,D=da0$D,eta=da0$eta,z)
  sub.index=which(da$eta==0)
  sub.da=da[sub.index,]  
    
  new.Y=da$Y 
  new.Y[which(da$Y<=t0)]=t0+1
  pseudo.resp=c(as.numeric(da$L0<=t0)*as.numeric(da$Y>t0)*as.numeric(da$eta==1)*log(new.Y-t0)/csurv.x, max.lmt)  
  da.cvt=as.matrix(z)
  n.cvt=(2*tau-1)*(t(da.cvt))%*%(as.numeric(da$L0<=t0)*as.numeric(da$Y>t0)*as.numeric(da$eta==1)/csurv.x)  
  pseudo.cvt=rbind(da.cvt*(as.numeric(da$L0<=t0)*as.numeric(da$Y>t0)*as.numeric(da$eta==1)/csurv.x), as.vector(n.cvt))
  rank=qr(pseudo.cvt)$rank
  if(rank!=(cvt.length+1))
  {
    print(paste("rq() DO NOT have full rank at", t0))
  }
  else{
    est.beta.obj=rq.fit.br(pseudo.cvt, pseudo.resp, ci=F)
    est.beta<-est.beta.obj$coef 
    ind.conv<-as.numeric(abs(est.beta.obj$residuals[num+1])>10^6)
    
    if(ind.conv==1) 
    {
      est.beta.seq=rbind(est.beta.seq, est.beta) 
      tmp.1=(as.numeric(da$Y<=t0+exp(da.cvt%*%as.vector(est.beta)))-tau)*as.numeric(da$L0<=t0)*as.numeric(da$Y>t0)*as.numeric(da$eta==1)/csurv.x*da.cvt
      
      tmp.2=matrix(0,nrow=num,ncol=(cvt.length+1))        
      tmp2.4=NULL      
      for(j in 1:nrow(sub.da)){
        tmp2.1=da.cvt*(as.numeric(da$D>=sub.da$D[j])*as.numeric(da$L0<=t0)*as.numeric(da$Y>t0)*as.numeric(da$eta==1)*(as.numeric(da$Y<=t0+exp(da.cvt%*%as.vector(est.beta)))-tau)/csurv.x)
        tmp2.2=apply(tmp2.1,2,sum)
        tmp2.3=sum(as.numeric(da$D>=sub.da$D[j]))
        tmp2.4=rbind(tmp2.4,tmp2.2/tmp2.3)
      }
      tmp.2[sub.index,]=tmp2.4

      zeta=tmp.1-tmp.2
      var.matrix=t(zeta)%*%zeta/num   ### sigmahat    
      sigma.sqrt=eigen(var.matrix)$vectors %*%diag(sqrt(eigen(var.matrix)$values))%*% t(eigen(var.matrix)$vectors) 

      est.beta.var=NULL
      ind.conv.var=NULL
      for(k in 1:(cvt.length+1)) 
      {
        pseudo.cvt.var=rbind(da.cvt*as.numeric(da$L0<=t0)*as.numeric(da$Y>t0)*as.numeric(da$eta==1)/csurv.x, as.vector(n.cvt)+2*sqrt(num)*sigma.sqrt[,k])
        est.beta.var.obj=rq.fit.br(pseudo.cvt.var, pseudo.resp, ci=F)
        ind.conv.var<-c(ind.conv.var, as.numeric(abs(est.beta.var.obj$residuals[num+1])>10^6))
        est.beta.var<-cbind(est.beta.var, est.beta.var.obj$coef-est.beta) 
      }
 
      if(mean(ind.conv.var)==1) 
      {
        est.cov=est.beta.var%*%t(est.beta.var)
        est.var=diag(est.cov)
        est.inv.deriv.matrx=est.beta.var%*%solve(sigma.sqrt)*sqrt(num)
        curr.inf=t(est.inv.deriv.matrx%*%t(zeta))

        est.var.seq=rbind(est.var.seq, est.var)    
        inf.est.func=cbind(inf.est.func, curr.inf) 
        L.bsq=L.bsq+1
      }     
    } 
  } 
} 
rownames(est.beta.seq)=colnames(est.beta.seq)=NULL
row.names(est.var.seq)=colnames(est.var.seq)=NULL
### point estimation ends

### second-stage inferences start
ind.conv==1&&mean(ind.conv.var)==1&rank==(cvt.length+1) ## TRUE

inf.ave=NULL
est.ave=NULL
inf.cnst.test.w1=NULL 
cnst.test.w1=NULL 
for(k in 1:(cvt.length+1)) 
{ 
   est.ave=c(est.ave, sum(est.beta.seq[1: (L.t0.seq-1), k]*t0.step)/(t0.U-t0.L))
   inf.ave=cbind(inf.ave, apply(inf.est.func[, seq(k, (L.t0.seq-1)*(cvt.length+1), cvt.length+1)]*t0.step, 1, sum)/(t0.U-t0.L)) 
   cnst.test.w1=c(cnst.test.w1, sum((est.beta.seq[1: (L.t0.seq-1), k]-est.ave[k])*t0.w1*t0.step)/sum(t0.w1*t0.step))
   inf.cnst.test.w1=cbind(inf.cnst.test.w1, apply(t(t(inf.est.func[, seq(k, (L.t0.seq-1)*(cvt.length+1), cvt.length+1)]-inf.ave[,k])*t0.w1)*t0.step, 1, sum)/sum(t0.w1*t0.step)) 
}

var.ave<-apply(inf.ave^2/num, 2, mean) 
p.signf.test=1-pchisq(est.ave^2/var.ave,1)
  
var.cnst.test.w1<-apply(inf.cnst.test.w1^2/num, 2, mean)
p.cnst.test=1-pchisq(cnst.test.w1^2/var.cnst.test.w1,1)
### second-stage inferences end



###############################################
### summarize results                       ###
###############################################
output1=data.frame(Coef=rep(char.cvt,each=L.t0.seq),t0=rep(t0.seq,cvt.length+1),Estimates=c(est.beta.seq),AveSE=sqrt(c(est.var.seq)))
output1[,3:4]=round(output1[,3:4],3)

output2=data.frame(Coef=char.cvt,Ave_eff=est.ave,Se_ave_eff=sqrt(var.ave),P_sig_test=p.signf.test,P_cnst_test=p.cnst.test)
output2[,2:3]=round(output2[,2:3],3)
output2[,4:5]=round(output2[,4:5],4)

### output results 
output1  # point estiamtes and SE
output2  # second-stage inferences

## Ave_eff: Trimmed mean effect 
## Se_ave_eff: SE of the trimmed mean effect 
## P_sig_test: p-value for overall significance test
## P_cnst_test: p-valuce for constancy test with weight t0.w1

###############################################
### plot                                    ###
###############################################
CI.lb.seq=est.beta.seq-1.96*sqrt(est.var.seq)
CI.ub.seq=est.beta.seq+1.96*sqrt(est.var.seq)

grp.frm.1<-apply(CI.lb.seq, 2, min)*0.99
grp.frm.2<-apply(CI.ub.seq, 2, max)*1.01
op<-par(mfrow=c(ceiling((cvt.length+1)/2), 2))
for(k in 1:(cvt.length+1)) 
{  
  plot(t0.seq, est.beta.seq[, k], ylim=c(grp.frm.1[k], grp.frm.2[k]), type="s", xlab=expression(t[0]), ylab="Coefficient", main=char.cvt[k])
  lines(t0.seq, CI.lb.seq[,k], type="s", lty=3)
  lines(t0.seq, CI.ub.seq[,k], type="s", lty=3)
  lines(t0.seq, rep(0, L.t0.seq))
  lines(t0.seq, rep(est.ave[k], L.t0.seq),lty=2)  
}