library(gsDesign)
library(gsDesign2)
library(dplyr)
library(gt)
library(simtrial)
library(tidyr)
library(survival)
In the survival (base R) package, the log-rank and Cox estimation
procedures apply (by default) a correction to “fix” roundoff errors.
These are implemented with the timefix
option (by default
timefix = TRUE
) via the aeqSurv()
function.
However, in the simtrial package, (and also Hmisc), such a correction is
not implemented; Consequently, there can be discrepancies between
simtrial and base R survival (survdiff()
,
coxph()
, and survfit()
).
For details on the aeqSurv()
function, see Therneau,
2016 and the ?aeqSurv
function documentation.
In the following, we describe a simulation scenario where a
discrepancy is generated and illustrate how discrepancies can be
resolved (if desired) by pre-processing survival times with
aeqSurv()
and thus replicating survdiff()
and
coxph()
default calculations.
In the simulated dataset, two observations are generated:
aeqSurv()
, these times are tied and set to \(Y=0.306132604679\).We define various true data generating model scenarios and convert
for use in gsDesign2. Here, we are using a single scenario where
discrepancies were found. This is just for illustration to inform the
user of simtrial that discrepancies can occur and how to resolve via
aeqSurv()
, if desired.
survival_at_24_months <- 0.35
hr <- log(.35) / log(.25)
control_median <- 12
control_rate <- c(log(2) / control_median, (log(.25) - log(.2)) / 12)
scenarios <- tribble(
~Scenario, ~Name, ~Period, ~duration, ~Survival,
0, "Control", 0, 0, 1,
0, "Control", 1, 24, .25,
0, "Control", 2, 12, .2,
1, "PH", 0, 0, 1,
1, "PH", 1, 24, .35,
1, "PH", 2, 12, .2^hr,
2, "3-month delay", 0, 0, 1,
2, "3-month delay", 1, 3, exp(-3 * control_rate[1]),
2, "3-month delay", 2, 21, .35,
2, "3-month delay", 3, 12, .2^hr,
3, "6-month delay", 0, 0, 1,
3, "6-month delay", 1, 6, exp(-6 * control_rate[1]),
3, "6-month delay", 2, 18, .35,
3, "6-month delay", 3, 12, .2^hr,
4, "Crossing", 0, 0, 1,
4, "Crossing", 1, 3, exp(-3 * control_rate[1] * 1.3),
4, "Crossing", 2, 21, .35,
4, "Crossing", 3, 12, .2^hr,
5, "Weak null", 0, 0, 1,
5, "Weak null", 1, 24, .25,
5, "Weak null", 2, 12, .2,
6, "Strong null", 0, 0, 1,
6, "Strong null", 1, 3, exp(-3 * control_rate[1] * 1.5),
6, "Strong null", 2, 3, exp(-6 * control_rate[1]),
6, "Strong null", 3, 18, .25,
6, "Strong null", 4, 12, .2,
)
# scenarios |> gt()
fr <- scenarios |>
group_by(Scenario) |>
# filter(Scenario == 2) |>
mutate(
Month = cumsum(duration),
x_rate = -(log(Survival) - log(lag(Survival, default = 1))) /
duration,
rate = ifelse(Month > 24, control_rate[2], control_rate[1]),
hr = x_rate / rate
) |>
select(-x_rate) |>
filter(Period > 0, Scenario > 0) |>
ungroup()
# fr |> gt() |> fmt_number(columns = everything(), decimals = 2)
fr <- fr |> mutate(fail_rate = rate, dropout_rate = 0.001, stratum = "All")
# MWLR
mwlr <- fixed_design_mb(
tau = 12,
enroll_rate = define_enroll_rate(duration = 12, rate = 1),
fail_rate = fr |> filter(Scenario == 2),
alpha = 0.025, power = .85, ratio = 1,
study_duration = 36
) |> to_integer()
er <- mwlr$enroll_rate
set.seed(3219)
dgm <- fr[c(14:17), ]
fail_rate <- data.frame(
stratum = rep("All", 2 * nrow(dgm)),
period = rep(dgm$Period, 2),
treatment = c(
rep("control", nrow(dgm)),
rep("experimental", nrow(dgm))
),
duration = rep(dgm$duration, 2),
rate = c(dgm$rate, dgm$rate * dgm$hr)
)
dgm$stratum <- "All"
# Constant dropout rate for both treatment arms and all scenarios
dropout_rate <- data.frame(
stratum = rep("All", 2),
period = rep(1, 2),
treatment = c("control", "experimental"),
duration = rep(100, 2),
rate = rep(.001, 2)
)
Simulated dataset with discrepancy between logrank test of
simtrial::wlr()
and survdiff()
(also compare
to score test of coxph()
[same as survdiff()
with default timefix = TRUE
]).
ss <- 395
set.seed(8316951 + ss * 1000)
# Generate a dataset
dat <- sim_pw_surv(
n = 698,
enroll_rate = er,
fail_rate = fail_rate,
dropout_rate = dropout_rate
)
analysis_data <- cut_data_by_date(dat, 36)
dfa <- analysis_data
dfa$treat <- ifelse(dfa$treatment == "experimental", 1, 0)
z1 <- dfa |> wlr(weight = fh(rho = 0, gamma = 0))
check <- survdiff(Surv(tte, event) ~ treat, data = dfa)
# Note, for `coxph()`, use
# cph.score <- summary(coxph(Surv(tte, event) ~ treat, data = dfa, control = coxph.control(timefix = TRUE)))$sctest
cat("Log-rank wlr() vs survdiff()", c(z1$z^2, check$chisq), "\n")
## Log-rank wlr() vs survdiff() 0.1577428 0.1577954
Verify that timefix = FALSE
in coxph()
agrees with wlr()
:
cph.score <- summary(coxph(
Surv(tte, event) ~ treat,
data = dfa,
control = coxph.control(timefix = FALSE)
))$sctest
cat("Log-rank wlr() vs Cox score z^2", c(z1$z^2, cph.score["test"]), "\n")
## Log-rank wlr() vs Cox score z^2 0.1577428 0.1577428
Pre-processing survival times with aeqSurv()
to
implement timefix = TRUE
procedure.
Verify wlr()
and survdiff()
now agree.
Y <- dfa[, "tte"]
Delta <- dfa[, "event"]
tfixed <- aeqSurv(Surv(Y, Delta))
Y <- tfixed[, "time"]
Delta <- tfixed[, "status"]
# Use aeqSurv version
dfa$tte2 <- Y
dfa$event2 <- Delta
# wlr() after "timefix"
dfa2 <- dfa
dfa2$tte <- dfa2$tte2
dfa2$event <- dfa2$event2
z1new <- dfa2 |> wlr(weight = fh(rho = 0, gamma = 0))
cat("Log-rank wlr() with timefix vs survdiff() z^2", c(z1new$z^2, check$chisq), "\n")
## Log-rank wlr() with timefix vs survdiff() z^2 0.1577954 0.1577954
Where do they differ (tte2
are times after
aeqSurv()
)?
dfa <- dfa[order(dfa$tte2), ]
id <- seq(1, nrow(dfa))
diff <- exp(dfa$tte) - exp(dfa$tte2)
id_diff <- which(abs(diff) > 0)
tolook <- seq(id_diff - 2, id_diff + 2)
dfcheck <- dfa[tolook, c("tte", "tte2", "event", "event2", "treatment")]
print(dfcheck, digits = 12)
## tte tte2 event event2 treatment
## 13 0.276251560170 0.276251560170 1 1 experimental
## 143 0.298789385712 0.298789385712 1 1 control
## 464 0.306132722582 0.306132604679 1 1 control
## 516 0.306132604679 0.306132604679 1 1 experimental
## 605 0.336489970678 0.336489970678 1 1 experimental
Verify coxph()
(default) and coxph()
with
aeqSurv()
pre-processing (using tte2
as
outcome and setting timefix = FALSE
) are identical:
Also note that here ties do not have impact because in separate arms.
# Check Cox with ties
cox_breslow <- summary(coxph(Surv(tte, event) ~ treatment, data = dfa, ties = "breslow"))$conf.int
cox_efron <- summary(coxph(Surv(tte, event) ~ treatment, data = dfa, ties = "efron"))$conf.int
cat("Cox Breslow and Efron hr (tte, timefix=TRUE):", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte, timefix=TRUE): 0.9657106 0.9657106
# Here ties do not have impact because in separate arms
cox_breslow <- summary(coxph(Surv(tte2, event2) ~ treatment, data = dfa, ties = "breslow", control = coxph.control(timefix = FALSE)))$conf.int
cox_efron <- summary(coxph(Surv(tte2, event2) ~ treatment, data = dfa, ties = "efron", control = coxph.control(timefix = FALSE)))$conf.int
cat("Cox Breslow and Efron hr (tte2, timefix=FALSE):", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte2, timefix=FALSE): 0.9657106 0.9657106
So here there is a difference between tte
and
tte2
times, but there is not an impact of ties for Cox
between "breslow"
and "efron"
because the ties
(single tie in tte2
) are in separate arms.
Lastly, artificially change treatment so that two observations are
tied within the same treatment arm which generates difference between
"breslow"
and "efron"
options for
ties
:
# Create tie within treatment arm by changing treatment
dfa3 <- dfa
dfa3[19, "treat"] <- 1.0
cox_breslow <- summary(coxph(Surv(tte, event) ~ treat, data = dfa3, ties = "breslow", control = coxph.control(timefix = TRUE)))$conf.int
cox_efron <- summary(coxph(Surv(tte, event) ~ treat, data = dfa3, ties = "efron", control = coxph.control(timefix = TRUE)))$conf.int
cat("Cox Breslow and Efron hr (tte, timefix=TRUE)=", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte, timefix=TRUE)= 0.9729723 0.9729778
Same as
cox_breslow <- summary(coxph(Surv(tte2, event2) ~ treat, data = dfa3, ties = "breslow", control = coxph.control(timefix = FALSE)))$conf.int
cox_efron <- summary(coxph(Surv(tte2, event2) ~ treat, data = dfa3, ties = "efron", control = coxph.control(timefix = FALSE)))$conf.int
cat("Cox Breslow and Efron hr (tte2, timefix=FALSE)=", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte2, timefix=FALSE)= 0.9729723 0.9729778