Analyzing twin survival data with 'mets'
Table of Contents
Installation
Install dependencies (R>=2.15
) :
install.packages(c("mets","cmprsk"), dependencies=TRUE)
OBS: At this point you might have to restart R
to flush the cache
of previously installed versions of the packages. If you have
previously installed timereg
and lava
, make sure that you have the
current versions installed (timereg: 1.8.4, lava: 1.2.6).
Load simulated data
library(mets)
The dataset prt
contains (simulated) observations on prostate cancer
with the following columns
-
country
- Country (Denmark,Finland,Norway,Sweden)
-
time
- exit time (censoring,death or prostate cancer)
-
status
- Status (censoring=0,death=1 or prostate cancer=2)
-
zyg
- Zygosity (DZ,MZ)
-
id
- Twin id number
-
cancer
- cancer indicator (status=2)
data(prt) head(prt)
Status table
prtwide <- fast.reshape(prt,id="id") ftable(status1~status2,prtwide)
Estimation of cumulative incidence
times <- seq(40,100,by=2) cifmod <- comp.risk(Hist(time,status)~+1+cluster(id),data=prt, cause=2,n.sim=0, times=times,conservative=1,max.clust=NULL,model="fg") theta.des <- model.matrix(~-1+factor(zyg),data=prt) ## design for MZ/DZ status or1 <- or.cif(cifmod,data=prt,cause1=2,cause2=2,theta.des=theta.des, score.method="fisher.scoring",same.cens=TRUE) summary(or1) or1$score
pcif <- predict(cifmod,X=1,resample.iid=0,uniform=0,se=0)
plot(pcif,multiple=1,se=0,uniform=0,ylim=c(0,0.15))
Assumes that the censoring of the two twins are independent (when they are the same):
incorrect.or1 <- or.cif(cifmod,data=prt,cause1=2,cause2=2,theta.des=theta.des, theta=c(2.8,8.6),score.method="fisher.scoring") summary(incorrect.or1) ## not bad incorrect.or1$score
Correcting for country
table(prt$country) times <- seq(40,100,by=2) cifmodl <-comp.risk(Hist(time,status)~-1+factor(country)+cluster(id),data=prt, cause=2,n.sim=0,times=times,conservative=1, max.clust=NULL,cens.model="aalen") pcifl <- predict(cifmodl,X=diag(4),se=0,uniform=0) plot(pcifl,multiple=1,se=0,uniform=0,col=1:4,ylim=c(0,0.2)) legend("topleft",levels(prt$country),col=1:4,lty=1)
Design for MZ/DZ status
theta.des <- model.matrix(~-1+factor(zyg),data=prt) or.country <- or.cif(cifmodl,data=prt,cause1=2,cause2=2,theta.des=theta.des, theta=c(0.8,2.1),score.method="fisher.scoring",same.cens=TRUE) summary(or.country)
Concordance estimation
Ignoring country. Computing casewise, using prodlim
. CIF:
outm <- prodlim(Hist(time,status)~+1,data=prt) times <- 60:100 ## cause is 2 (second cause) cifmz <- predict(outm,cause=2,time=times,newdata=data.frame(zyg="MZ")) cifdz <- predict(outm,cause=2,time=times,newdata=data.frame(zyg="DZ"))
### casewise pp33 <- bicomprisk(Hist(time,status)~strata(zyg)+id(id),data=prt,cause=c(2,2),prodlim=TRUE) pp33dz <- pp33$model$"DZ" pp33mz <- pp33$model$"MZ" concdz <- predict(pp33dz,cause=1,time=times,newdata=data.frame(zyg="DZ")) concmz <- predict(pp33mz,cause=1,time=times,newdata=data.frame(zyg="MZ"))
par(mfrow=c(1,2)) plot(times,concdz,ylim=c(0,0.1),type="s") lines(pcif$time,pcif$P1^2,col=2) title(main="DZ Conc. Prostate cancer") plot(times,concmz,ylim=c(0,0.1),type="s") title(main="MZ Conc. Prostate cancer") lines(pcif$time,pcif$P1^2,col=2)
par(mfrow=c(1,1)) cdz <- casewise(pp33dz,outm,cause.marg=2) cmz <- casewise(pp33mz,outm,cause.marg=2) plot(cmz,ci=NULL,ylim=c(0,0.5),xlim=c(60,100),legend=TRUE,col=c(3,2,1)) par(new=TRUE) plot(cdz,ci=NULL,ylim=c(0,0.5),xlim=c(60,100),legend=TRUE)
Similar analyses using comp.risk
for competing risks
leads to tests for equal concordance and more correct standard
errors
p33 <- bicomprisk(Hist(time,status)~strata(zyg)+id(id),data=prt,cause=c(2,2),return.data=1) p33dz <- p33$model$"DZ"$comp.risk p33mz <- p33$model$"MZ"$comp.risk
head(cbind(p33mz$time, p33mz$P1, p33mz$se.P1)) head(cbind(p33dz$time, p33dz$P1, p33dz$se.P1))
Test for genetic effect, needs other form of bicomprisk with iid decomp
conc1 <- p33dz conc2 <- p33mz test.conc(p33dz,p33mz);
OR expression of difference in concordance functions and Gray test
data33mz <- p33$model$"MZ"$data data33mz$zyg <- 1 data33dz <- p33$model$"DZ"$data data33dz$zyg <- 0 data33 <- rbind(data33mz,data33dz) library(cmprsk) ftime <- data33$time fstatus <- data33$status table(fstatus)
group <- data33$zyg graytest <- cuminc(ftime,fstatus,group) graytest
zygeffect <- comp.risk(Hist(time,status)~const(zyg), data=data33,cause=1, cens.model="aalen",model="logistic",conservative=1) summary(zygeffect)
Liability model, ignoring censoring
(M <- with(prt, table(cancer,zyg)))
coef(lm(cancer~-1+zyg,prt))
Saturated model
bpmz <- biprobit(cancer~1 + cluster(id), data=subset(prt,zyg=="MZ"), eqmarg=TRUE) logLik(bpmz) # Log-likelihood AIC(bpmz) # AIC coef(bpmz) # Parameter estimates vcov(bpmz) # Asymptotic covariance summary(bpmz) # concordance, case-wise, tetrachoric correlations, ...
bp0 <- biprobit(cancer~1 + cluster(id)+strata(zyg), data=prt)
summary(bp0)
Equal marginals MZ/DZ
bp1 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id",type="u",data=prt) (s <- summary(bp1))
Components (concordance,cor,…) can be extracted from returned list
s$all
Likelihood Ratio Test
compare(bp0,bp1)
Polygenic Libability model via te bptwin
function (type
can be a
subset of "acde", or "flex" for stratitified, "u" for random effects
model with same marginals for MZ and DZ)
bp2 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id",type="ace",data=prt) summary(bp2)
Liability model, Inverse Probability Weighting
Probability weights based on Aalen's additive model
prtw <- ipw(Surv(time,status==0)~country, data=prt, cluster="id",weightname="w") plot(0,type="n",xlim=range(prtw$time),ylim=c(0,1),xlab="Age",ylab="Probability") count <- 0 for (l in unique(prtw$country)) { count <- count+1 prtw <- prtw[order(prtw$time),] with(subset(prtw,country==l), lines(time,w,col=count,lwd=2)) } legend("topright",legend=unique(prtw$country),col=1:4,pch=-1,lty=1)
bpmzIPW <- biprobit(cancer~1 + cluster(id), data=subset(prtw,zyg=="MZ"), weight="w") (smz <- summary(bpmzIPW))
Comparison with CIF
plot(pcif,multiple=1,se=1,uniform=0,ylim=c(0,0.15)) abline(h=smz$prob["Marginal",],lwd=c(2,1,1)) ## Wrong estimates: abline(h=summary(bpmz)$prob["Marginal",],lwd=c(2,1,1),col="lightgray")
Concordance estimates
plot(pp33mz,ylim=c(0,0.1)) abline(h=smz$prob["Concordance",],lwd=c(2,1,1)) ## Wrong estimates: abline(h=summary(bpmz)$prob["Concordance",],lwd=c(2,1,1),col="lightgray")
ACE model with IPW
bp3 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id", type="ace",data=prtw,weight="w") summary(bp3)
Equal marginals but free variance structure between MZ and DZ
bp4 <- bptwin(cancer~1,zyg="zyg",DZ="DZ",id="id", type="u",data=prtw,weight="w") summary(bp4)
Check convergence
mean(score(bp4)^2)
Liability model, adjusting for covariates
Main effect of country
bp6 <- bptwin(cancer~country,zyg="zyg",DZ="DZ",id="id", type="ace",data=prtw,weight="w") summary(bp6)
bp7 <- bptwin(cancer~country,zyg="zyg",DZ="DZ",id="id", type="u",data=prtw,weight="w") summary(bp7)
Stratified analysis
bp8 <- bptwin(cancer~strata(country),zyg="zyg",DZ="DZ",id="id", type="u",data=prtw,weight="w")
summary(bp8)
Wald test (stratified vs main effect)
B <- contr(3,4)[-(1:3),]
compare(bp8,contrast=B)