Analyzing twin survival data with 'mets'

Table of Contents

Installation

Install dependencies (R>=2.15) :

install.packages(c("mets","cmprsk"), dependencies=TRUE)

OBS: At this point you might have to restart R to flush the cache of previously installed versions of the packages. If you have previously installed timereg and lava, make sure that you have the current versions installed (timereg: 1.8.4, lava: 1.2.6).

Load simulated data

library(mets)

The dataset prt contains (simulated) observations on prostate cancer with the following columns

country
Country (Denmark,Finland,Norway,Sweden)
time
exit time (censoring,death or prostate cancer)
status
Status (censoring=0,death=1 or prostate cancer=2)
zyg
Zygosity (DZ,MZ)
id
Twin id number
cancer
cancer indicator (status=2)
data(prt)
head(prt)

Status table

prtwide <- fast.reshape(prt,id="id")
ftable(status1~status2,prtwide)

Estimation of cumulative incidence

times <- seq(40,100,by=2)
cifmod <- comp.risk(Hist(time,status)~+1+cluster(id),data=prt,
                    cause=2,n.sim=0,
                    times=times,conservative=1,max.clust=NULL,model="fg")

theta.des <- model.matrix(~-1+factor(zyg),data=prt) ## design for MZ/DZ status
or1 <- or.cif(cifmod,data=prt,cause1=2,cause2=2,theta.des=theta.des,
              score.method="fisher.scoring",same.cens=TRUE)
summary(or1)
or1$score
pcif <- predict(cifmod,X=1,resample.iid=0,uniform=0,se=0)
plot(pcif,multiple=1,se=0,uniform=0,ylim=c(0,0.15))

Assumes that the censoring of the two twins are independent (when they are the same):

incorrect.or1 <- or.cif(cifmod,data=prt,cause1=2,cause2=2,theta.des=theta.des, 
                        theta=c(2.8,8.6),score.method="fisher.scoring")
summary(incorrect.or1)
## not  bad
incorrect.or1$score

Correcting for country

table(prt$country)

times <- seq(40,100,by=2)
cifmodl <-comp.risk(Hist(time,status)~-1+factor(country)+cluster(id),data=prt,
                    cause=2,n.sim=0,times=times,conservative=1,
                    max.clust=NULL,cens.model="aalen")
pcifl <- predict(cifmodl,X=diag(4),se=0,uniform=0)
plot(pcifl,multiple=1,se=0,uniform=0,col=1:4,ylim=c(0,0.2))
legend("topleft",levels(prt$country),col=1:4,lty=1)

Design for MZ/DZ status

theta.des <- model.matrix(~-1+factor(zyg),data=prt) 
or.country <- or.cif(cifmodl,data=prt,cause1=2,cause2=2,theta.des=theta.des,
                     theta=c(0.8,2.1),score.method="fisher.scoring",same.cens=TRUE)

summary(or.country)

Concordance estimation

Ignoring country. Computing casewise, using prodlim. CIF:

outm <- prodlim(Hist(time,status)~+1,data=prt)

times <- 60:100
## cause is 2 (second cause)
cifmz <- predict(outm,cause=2,time=times,newdata=data.frame(zyg="MZ"))
cifdz <- predict(outm,cause=2,time=times,newdata=data.frame(zyg="DZ"))
### casewise 
pp33 <- bicomprisk(Hist(time,status)~strata(zyg)+id(id),data=prt,cause=c(2,2),prodlim=TRUE)
pp33dz <- pp33$model$"DZ"
pp33mz <- pp33$model$"MZ"
concdz <- predict(pp33dz,cause=1,time=times,newdata=data.frame(zyg="DZ"))
concmz <- predict(pp33mz,cause=1,time=times,newdata=data.frame(zyg="MZ"))
par(mfrow=c(1,2))
plot(times,concdz,ylim=c(0,0.1),type="s")
lines(pcif$time,pcif$P1^2,col=2)
title(main="DZ Conc. Prostate cancer")
plot(times,concmz,ylim=c(0,0.1),type="s")
title(main="MZ Conc. Prostate cancer")
lines(pcif$time,pcif$P1^2,col=2)
par(mfrow=c(1,1))
cdz <- casewise(pp33dz,outm,cause.marg=2)
cmz <- casewise(pp33mz,outm,cause.marg=2)             
plot(cmz,ci=NULL,ylim=c(0,0.5),xlim=c(60,100),legend=TRUE,col=c(3,2,1))
par(new=TRUE)
plot(cdz,ci=NULL,ylim=c(0,0.5),xlim=c(60,100),legend=TRUE)

Similar analyses using comp.risk for competing risks leads to tests for equal concordance and more correct standard errors

p33 <- bicomprisk(Hist(time,status)~strata(zyg)+id(id),data=prt,cause=c(2,2),return.data=1)

p33dz <- p33$model$"DZ"$comp.risk
p33mz <- p33$model$"MZ"$comp.risk
head(cbind(p33mz$time, p33mz$P1, p33mz$se.P1))
head(cbind(p33dz$time, p33dz$P1, p33dz$se.P1))

Test for genetic effect, needs other form of bicomprisk with iid decomp

conc1 <- p33dz
conc2 <- p33mz

test.conc(p33dz,p33mz);

OR expression of difference in concordance functions and Gray test

data33mz <- p33$model$"MZ"$data
data33mz$zyg <- 1
data33dz <- p33$model$"DZ"$data
data33dz$zyg <- 0
data33 <- rbind(data33mz,data33dz)

library(cmprsk)
ftime <- data33$time
fstatus <- data33$status
table(fstatus)
group <- data33$zyg
graytest <- cuminc(ftime,fstatus,group)
graytest
zygeffect <- comp.risk(Hist(time,status)~const(zyg),
                  data=data33,cause=1,
                  cens.model="aalen",model="logistic",conservative=1)
summary(zygeffect)

Liability model, ignoring censoring

(M <- with(prt, table(cancer,zyg)))
coef(lm(cancer~-1+zyg,prt))

Saturated model

bpmz <- biprobit(cancer~1 + cluster(id), 
             data=subset(prt,zyg=="MZ"), eqmarg=TRUE)

logLik(bpmz) # Log-likelihood
AIC(bpmz) # AIC
coef(bpmz) # Parameter estimates
vcov(bpmz) # Asymptotic covariance
summary(bpmz) # concordance, case-wise, tetrachoric correlations, ...
bp0 <- biprobit(cancer~1 + cluster(id)+strata(zyg), data=prt)
summary(bp0)

Equal marginals MZ/DZ

bp1 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id",type="u",data=prt)
(s <- summary(bp1))

Components (concordance,cor,…) can be extracted from returned list

s$all

Likelihood Ratio Test

compare(bp0,bp1)

Polygenic Libability model via te bptwin function (type can be a subset of "acde", or "flex" for stratitified, "u" for random effects model with same marginals for MZ and DZ)

bp2 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id",type="ace",data=prt)
summary(bp2)

Liability model, Inverse Probability Weighting

Probability weights based on Aalen's additive model

prtw <- ipw(Surv(time,status==0)~country, data=prt,
            cluster="id",weightname="w") 
plot(0,type="n",xlim=range(prtw$time),ylim=c(0,1),xlab="Age",ylab="Probability")
count <- 0
for (l in unique(prtw$country)) {
    count <- count+1
    prtw <- prtw[order(prtw$time),]
    with(subset(prtw,country==l), 
         lines(time,w,col=count,lwd=2))
}
legend("topright",legend=unique(prtw$country),col=1:4,pch=-1,lty=1)
bpmzIPW <- biprobit(cancer~1 + cluster(id), 
                    data=subset(prtw,zyg=="MZ"), 
                    weight="w")
(smz <- summary(bpmzIPW))

Comparison with CIF

plot(pcif,multiple=1,se=1,uniform=0,ylim=c(0,0.15))
abline(h=smz$prob["Marginal",],lwd=c(2,1,1))
## Wrong estimates:
abline(h=summary(bpmz)$prob["Marginal",],lwd=c(2,1,1),col="lightgray")

Concordance estimates

plot(pp33mz,ylim=c(0,0.1))
abline(h=smz$prob["Concordance",],lwd=c(2,1,1))
## Wrong estimates:
abline(h=summary(bpmz)$prob["Concordance",],lwd=c(2,1,1),col="lightgray")

ACE model with IPW

bp3 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id",
              type="ace",data=prtw,weight="w")
summary(bp3)

Equal marginals but free variance structure between MZ and DZ

bp4 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id",
              type="u",data=prtw,weight="w")
summary(bp4)

Check convergence

mean(score(bp4)^2)

Liability model, adjusting for covariates

Main effect of country

bp6 <- bptwin(cancer~country,zyg="zyg",DZ="DZ",id="id",
              type="ace",data=prtw,weight="w")
summary(bp6)
bp7 <- bptwin(cancer~country,zyg="zyg",DZ="DZ",id="id",
              type="u",data=prtw,weight="w")
summary(bp7)

Stratified analysis

bp8 <- bptwin(cancer~strata(country),zyg="zyg",DZ="DZ",id="id",
              type="u",data=prtw,weight="w")
summary(bp8)

Wald test (stratified vs main effect)

B <- contr(3,4)[-(1:3),]
compare(bp8,contrast=B)

Created: 2014-05-09 Fri 12:12