############################################################################################# ##Investigating hospital heterogeneity with a multi-state frailty model: ##application to nosocomial pneumonia disease in intensive care unit" ## by B. Liquet, J.F. Timsit and V. Rondeau ############################################################################################# # We propose two main functions : Simul.semi.markov and Analysis.multi.state # Simul.semi.markov: Create a simple simulated data set # Analysis.multi.state: perform a multi-state model based on the package "frailtypack" require(frailtypack) ############################################################################################# ## Create a simple simulated data set function: Simul.semi.markov ## With a Weibull distribution for each transition with two explanatory variables ## v1 and v2 follow Bernouilli distribution with probability 0.5 ############################################################################################# ############################################################################################# ### Example: one run of the simulation corresponding to table 4: ############################################################################################# G <- 30 # number of groups ni <- 150 # number of patients per group par01<-c(1.3,15);par02<-c(1.3,35);par03<-c(1.25,15);par12<-c(1.3,45);par13<-c(1.25,41) ### Weibull parameters for each transition function coef01<-c(0.8,1);coef02<-c(.6,1.2);coef03<-c(1.3,.3);coef12<-c(.7,1.1);coef13<-c(0.6,1.2) ### coefhk: true effects of the explanatory variable v1 and v2 p <- 0.5 # parameter of the Bernouilli for v1 and v2 gamma <- 0.15 # frailty variance paragamma <- c(1/gamma,gamma) #parameter for the gamma distribution for the frailties terms loi01<-list(par=par01,coef=coef01) loi02<-list(par=par02,coef=coef02) loi03<-list(par=par03,coef=coef03) loi12<-list(par=par12,coef=coef12) loi13<-list(par=par13,coef=coef13) ############################################################################################### ### ========================================== function Simul.semi.markov ==================### ### Simul.semi.markov: function which generate data from a semi-markov multi-state model Simul.semi.markov <-function(G=30,ni=150,paragamma=paragamma,p=0.5,loi01,loi02,loi03,loi12,loi13){ vecnj <- rep(ni,G) n <- sum(vecnj) # sample size of the generated data pass01<-rep(0,n) # pass01 indicates if subject transit from 0 to 1 at time "time0" pass02<-rep(0,n) # pass01 indicates if subject transit from 0 to 2 at time "time0" pass03<-rep(0,n) # pass01 indicates if subject transit from 0 to 3 at time "time0" pass12<-rep(0,n) # pass12 indicates if subject transit from 1 to 2 at time "time1" pass13<-rep(0,n) # pass13 indicates if subject transit from 1 to 3 at time "time1" v1<-rbinom(n,1,p) # generates the values of the variable v1 v2<-rbinom(n,1,p) # generates the values of the variable v2 obs<-c(1:n);time0<-rep(0,n);time1<-rep(0,n);centre <- rep(0,n) ## database: initialization at 0 database<-data.frame(obs,TIME0=time0,PASS01=pass01,v1=rep(0,n),v2=rep(0,n),PASS02=pass02,PASS03=pass03,TIME1=time1,PASS12=pass12,PASS13=pass13,IDCENTRE=centre) ##time0 represent the time of the first jump from 0 to k ##time1 represent the time spend in the state 1 before jumping from 1 to k centrej <- 0 i <- 0 for(nj in vecnj){ u01 <- rgamma(1,shape= paragamma[1],scale=paragamma[2]) u02 <- rgamma(1,shape= paragamma[1],scale=paragamma[2]) u03 <- rgamma(1,shape= paragamma[1],scale=paragamma[2]) u12 <- rgamma(1,shape= paragamma[1],scale=paragamma[2]) u13 <- rgamma(1,shape= paragamma[1],scale=paragamma[2]) centrej <- centrej+1 for (k in 1:nj){ i <- i+1 t01<- gene.weibull(loi01$par,u01*exp(loi01$coef[1]*v1[i]+loi01$coef[2]*v2[i])) t02<- gene.weibull(loi02$par,u02*exp(loi02$coef[1]*v1[i]+loi02$coef[2]*v2[i])) t03<- gene.weibull(loi03$par,u03*exp(loi03$coef[1]*v1[i]+loi03$coef[2]*v2[i])) tk<-c(t01,t02,t03) ## 3 simulated times for the 3 transitions 01 02 03 if(which.min(tk)==1){ ## transition 01, t12<- gene.weibull(loi12$par,u12*exp(loi12$coef[1]*v1[i]+loi12$coef[2]*v2[i])) t13<- gene.weibull(loi13$par,u13*exp(loi13$coef[1]*v1[i]+loi13$coef[2]*v2[i])) rtt<-c(t12,t13) ## 2 simulated times for the transitions 12 13 if(which.min(rtt)==1){ ## then transition 12. bh<-c(i,tk[1],1,v1[i],v2[i],0,0,t12,1,0,centrej) }else{ ## then transition 13. bh<-c(i,tk[1],1,v1[i],v2[i],0,0,t13,0,1,centrej) } database[i,]<-bh } if(which.min(tk)==2){ ## transition 02. bh<-c(i,tk[2],0,v1[i],v2[i],1,0,0,0,0,centrej) database[i,]<-bh } if(which.min(tk)==3){ ## transition 03. bh<-c(i,tk[3],0,v1[i],v2[i],0,1,0,0,0,centrej) database[i,]<-bh } }} return(database) } ### ==== End function Simul.semi.markov =================================### ############################################################################################## ## gene.weibull generates survival time fom a proportional hazard weibull intensity ## X represents the predictor : exp(beta1*v1+beta2*v2) ## para: vector of the parameter of the weibull baseline intensity gene.weibull <- function(para,X){ b <- para[2] a <- para[1] return(b*((-(1/X)*log(1-runif(1)))**(1/a))) } ############################################################################################### ### ======================================= function analysis.multi-state ==================### ## Analysis.multi-state: function which perform a multi-state model ## non-homogeneous markov ("non-homogeneous") or semi-markov model ("semi") ## parametric (Weibull) estimation or penalized likelihood approach (penalized) Analysis.multi.state <- function(data=mydata,model="semi",estimator="penalized"){ n <- dim(data)[1] data1<-data[which(data$PASS01==1),] ## data include observation which have transit from 0 to 1 n.VAP <- dim(data1)[1] if(estimator=="penalized"){ res01 <-frailtyPenal(Surv(TIME0,PASS01)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") res02 <-frailtyPenal(Surv(TIME0,PASS02)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") res03 <-frailtyPenal(Surv(TIME0,PASS03)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") if(model=="semi"){ ### SEMI-MARKOV ### res12 <-frailtyPenal(Surv(TIME1,PASS12)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") res13 <-frailtyPenal(Surv(TIME1,PASS13)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") }else{ #### MARKOV NON-HOMOGENEOUS ### res12 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS12)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") res13 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS13)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,cross.validation=TRUE,n.knots=8,kappa1=1000,Frailty=TRUE,hazard="Splines") } LCV <- ((res01$LCV+res01$LCV+res01$LCV)*n+(res12$LCV+res13$LCV)*n.VAP)/n }else{ res01 <-frailtyPenal(Surv(TIME0,PASS01)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,Frailty=TRUE,hazard="Weibull") res02 <-frailtyPenal(Surv(TIME0,PASS02)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,hazard="Weibull") res03 <-frailtyPenal(Surv(TIME0,PASS03)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data,hazard="Weibull") if(model=="semi"){ res12 <-frailtyPenal(Surv(TIME1,PASS12)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull") res13 <-frailtyPenal(Surv(TIME1,PASS13)~ as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull") }else{ res12 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS12)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull") res13 <-frailtyPenal(Surv(TIME0,TIME0+TIME1,PASS13)~as.factor(v1)+v2+cluster(IDCENTRE),data=data1,Frailty=TRUE,hazard="Weibull") } LCV <- ((res01$AIC+res01$AIC+res01$AIC)*n+(res12$AIC+res13$AIC)*n.VAP)/n } res <- list(res01=res01,res02=res02,res03=res03,res12=res12,res13=res13,LCV.multi.state=LCV) return(res) } ### ==================End function Analysis.multi-state =======================================================### ################################################################################################################### ## Example on a simulated data set ## We perform the 4 model: semi-Markov (parametric and semi-parametric); ## Markov non-homogeneous (paremetric and semi-parametric ## We present also a table to choose the best model based on the LCV criterion ################################################################################################################### set.seed(133) mydata <- Simul.semi.markov(G=30,ni=150,paragamma=paragamma,p=0.5,loi01,loi02,loi03,loi12,loi13) model.semi.markov.penalized <- Analysis.multi.state(data=mydata,model="semi",estimator="penalized") model.non.homogeneous.penalized <- Analysis.multi.state(data=mydata,model="non-homogeneous",estimator="penalized") model.semi.markov.weibull <- Analysis.multi.state(data=mydata,model="semi",estimator="weibull") model.non.homogeneous.weibull <- Analysis.multi.state(data=mydata,model="non-homogeneous",estimator="weibull") ###table for the choice of the model tab <- matrix(c(model.non.homogeneous.weibull$LCV,model.semi.markov.weibull$LCV,model.non.homogeneous.weibull$LCV-model.semi.markov.weibull$LCV ,model.non.homogeneous.penalized$LCV,model.semi.markov.penalized$LCV ,model.non.homogeneous.penalized$LCV-model.semi.markov.penalized$LCV, model.non.homogeneous.weibull$LCV-model.non.homogeneous.penalized$LCV,model.semi.markov.weibull$LCV-model.semi.markov.penalized$LCV ,NA ),nrow=3,ncol=3,byrow=T) rownames(tab) <- c("parametric","semi-parametric","difference") tab <- as.data.frame(tab) colnames(tab) <- c("Non-homogeneous","Semi-Markov","difference") print(tab)