#############################################################################################
##Investigating hospital heterogeneity with a multi-state frailty model: 
##application to nosocomial pneumonia disease in intensive care unit"
##              by B. Liquet, J.F. Timsit and V. Rondeau
#############################################################################################

# We propose two main functions : Simul.semi.markov and Analysis.multi.state
# Simul.semi.markov: Create a simple simulated data set
# Analysis.multi.state: perform a multi-state model based on the package "frailtypack"

require(frailtypack)


#############################################################################################
##       Create a simple simulated data set function: Simul.semi.markov
##       With a Weibull distribution for each transition with two explanatory variables
##       v1 and v2 follow Bernouilli distribution with probability 0.5
#############################################################################################

#############################################################################################
### Example: one run of the simulation corresponding to table 4: 
#############################################################################################
G <- 30 # number of groups
ni <- 150  # number of patients per group
par01<-c(1.3,15);par02<-c(1.3,35);par03<-c(1.25,15);par12<-c(1.3,45);par13<-c(1.25,41)
### Weibull parameters for each transition function
coef01<-c(0.8,1);coef02<-c(.6,1.2);coef03<-c(1.3,.3);coef12<-c(.7,1.1);coef13<-c(0.6,1.2)
### coefhk: true effects of the explanatory variable v1 and v2
p <- 0.5 # parameter of the Bernouilli for v1 and v2
gamma <- 0.15 # frailty variance 
paragamma <- c(1/gamma,gamma) #parameter for the gamma distribution for the frailties terms 
	  
loi01<-list(par=par01,coef=coef01)
loi02<-list(par=par02,coef=coef02)
loi03<-list(par=par03,coef=coef03)
loi12<-list(par=par12,coef=coef12)
loi13<-list(par=par13,coef=coef13)
###############################################################################################

### ========================================== function Simul.semi.markov ==================###
###   Simul.semi.markov: function which generate data from a semi-markov multi-state model

Simul.semi.markov <-function(G=30,ni=150,paragamma=paragamma,p=0.5,loi01,loi02,loi03,loi12,loi13){
           vecnj <- rep(ni,G)
           n <- sum(vecnj)  # sample size of the generated data
           pass01<-rep(0,n) # pass01 indicates if subject transit from 0 to 1 at time "time0"
           pass02<-rep(0,n) # pass01 indicates if subject transit from 0 to 2 at time "time0"
           pass03<-rep(0,n) # pass01 indicates if subject transit from 0 to 3 at time "time0"
           pass12<-rep(0,n) # pass12 indicates if subject transit from 1 to 2 at time "time1"
           pass13<-rep(0,n) # pass13 indicates if subject transit from 1 to 3 at time "time1"

           v1<-rbinom(n,1,p) # generates the values of the variable v1
           v2<-rbinom(n,1,p) # generates the values of the variable v2

          obs<-c(1:n);time0<-rep(0,n);time1<-rep(0,n);centre <- rep(0,n)
          ## database: initialization at 0
          database<-data.frame(obs,TIME0=time0,PASS01=pass01,v1=rep(0,n),v2=rep(0,n),PASS02=pass02,PASS03=pass03,TIME1=time1,PASS12=pass12,PASS13=pass13,IDCENTRE=centre)
          ##time0 represent the time of the first jump from 0 to k
          ##time1 represent the time spend in the state 1 before jumping from 1 to k
          centrej <- 0
          i <- 0
          for(nj in vecnj){
          
            u01 <- rgamma(1,shape= paragamma[1],scale=paragamma[2])
            u02 <- rgamma(1,shape= paragamma[1],scale=paragamma[2])
            u03 <- rgamma(1,shape= paragamma[1],scale=paragamma[2])
            u12 <- rgamma(1,shape= paragamma[1],scale=paragamma[2])
            u13 <- rgamma(1,shape= paragamma[1],scale=paragamma[2])
            centrej <- centrej+1
            
            for (k in 1:nj){
            i <- i+1
            t01<- gene.weibull(loi01$par,u01*exp(loi01$coef[1]*v1[i]+loi01$coef[2]*v2[i]))
            t02<- gene.weibull(loi02$par,u02*exp(loi02$coef[1]*v1[i]+loi02$coef[2]*v2[i]))
            t03<- gene.weibull(loi03$par,u03*exp(loi03$coef[1]*v1[i]+loi03$coef[2]*v2[i]))
            tk<-c(t01,t02,t03) ## 3 simulated times for the 3 transitions 01 02 03
            
            if(which.min(tk)==1){ ## transition 01,
            
                      t12<- gene.weibull(loi12$par,u12*exp(loi12$coef[1]*v1[i]+loi12$coef[2]*v2[i]))
                      t13<- gene.weibull(loi13$par,u13*exp(loi13$coef[1]*v1[i]+loi13$coef[2]*v2[i]))
                      rtt<-c(t12,t13) ## 2 simulated times for the transitions 12 13


                      if(which.min(rtt)==1){ ## then transition 12.
                        bh<-c(i,tk[1],1,v1[i],v2[i],0,0,t12,1,0,centrej)
                      }else{  ## then transition 13.
                        bh<-c(i,tk[1],1,v1[i],v2[i],0,0,t13,0,1,centrej)
                      }
            database[i,]<-bh
            }
                    
            if(which.min(tk)==2){ ## transition 02.
              bh<-c(i,tk[2],0,v1[i],v2[i],1,0,0,0,0,centrej)
              database[i,]<-bh
              }
              if(which.min(tk)==3){ ## transition 03.
                    bh<-c(i,tk[3],0,v1[i],v2[i],0,1,0,0,0,centrej)
                    database[i,]<-bh
              }

             }}
           
          return(database)
          
         }

### ==== End function Simul.semi.markov =================================###

##############################################################################################
## gene.weibull generates survival time fom a proportional hazard weibull intensity
## X represents the predictor : exp(beta1*v1+beta2*v2)
## para: vector of the parameter of the weibull baseline intensity

          gene.weibull <- function(para,X){
              b <- para[2]
              a <- para[1]
              return(b*((-(1/X)*log(1-runif(1)))**(1/a)))
              }
###############################################################################################

### ======================================= function analysis.multi-state ==================###
##         Analysis.multi-state: function which perform a multi-state model
##         non-homogeneous markov ("non-homogeneous") or semi-markov model ("semi")
##         parametric (Weibull) estimation or penalized likelihood approach (penalized)

Analysis.multi.state <- function(data=mydata,model="semi",estimator="penalized"){
n <- dim(data)[1]
data1<-data[which(data$PASS01==1),] ## data include observation which have transit from 0 to 1
n.VAP <- dim(data1)[1]

if(estimator=="penalized"){
res01 <-frailtyPenal(Surv(TIME0,PASS01)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")
res02 <-frailtyPenal(Surv(TIME0,PASS02)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")
res03 <-frailtyPenal(Surv(TIME0,PASS03)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")

if(model=="semi"){
### SEMI-MARKOV ###
res12 <-frailtyPenal(Surv(TIME1,PASS12)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")
res13 <-frailtyPenal(Surv(TIME1,PASS13)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")
}else{
#### MARKOV NON-HOMOGENEOUS ###
res12 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS12)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")
res13 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS13)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines")
}
LCV <- ((res01$LCV+res01$LCV+res01$LCV)*n+(res12$LCV+res13$LCV)*n.VAP)/n
}else{
res01 <-frailtyPenal(Surv(TIME0,PASS01)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,Frailty=TRUE,hazard="Weibull")
res02 <-frailtyPenal(Surv(TIME0,PASS02)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,hazard="Weibull")
res03 <-frailtyPenal(Surv(TIME0,PASS03)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,hazard="Weibull")

if(model=="semi"){
res12 <-frailtyPenal(Surv(TIME1,PASS12)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull")
res13 <-frailtyPenal(Surv(TIME1,PASS13)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull")
}else{
res12 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS12)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull")
res13 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS13)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull")
}
LCV <- ((res01$AIC+res01$AIC+res01$AIC)*n+(res12$AIC+res13$AIC)*n.VAP)/n
}
res <- list(res01=res01,res02=res02,res03=res03,res12=res12,res13=res13,LCV.multi.state=LCV)
return(res)
}
### ==================End function Analysis.multi-state =======================================================###

###################################################################################################################
##  Example on a simulated data set
##  We perform the 4 model: semi-Markov (parametric and semi-parametric); 
##                          Markov non-homogeneous (paremetric and semi-parametric  
##  We present also a table to choose the best model based on the LCV criterion
###################################################################################################################

set.seed(133)

mydata <- Simul.semi.markov(G=30,ni=150,paragamma=paragamma,p=0.5,loi01,loi02,loi03,loi12,loi13)

model.semi.markov.penalized <- Analysis.multi.state(data=mydata,model="semi",estimator="penalized")

model.non.homogeneous.penalized <- Analysis.multi.state(data=mydata,model="non-homogeneous",estimator="penalized")

model.semi.markov.weibull <- Analysis.multi.state(data=mydata,model="semi",estimator="weibull")

model.non.homogeneous.weibull <- Analysis.multi.state(data=mydata,model="non-homogeneous",estimator="weibull")


###table for the choice of the model
tab <- matrix(c(model.non.homogeneous.weibull$LCV,model.semi.markov.weibull$LCV,model.non.homogeneous.weibull$LCV-model.semi.markov.weibull$LCV
,model.non.homogeneous.penalized$LCV,model.semi.markov.penalized$LCV ,model.non.homogeneous.penalized$LCV-model.semi.markov.penalized$LCV,
model.non.homogeneous.weibull$LCV-model.non.homogeneous.penalized$LCV,model.semi.markov.weibull$LCV-model.semi.markov.penalized$LCV ,NA
),nrow=3,ncol=3,byrow=T)
rownames(tab) <- c("parametric","semi-parametric","difference")
tab <- as.data.frame(tab)
colnames(tab) <- c("Non-homogeneous","Semi-Markov","difference")

print(tab)