Note on potential discrepancies between simtrial and survdiff

Overview

library(gsDesign)
library(gsDesign2)
library(dplyr)
library(gt)
library(simtrial)
library(tidyr)
library(survival)

In the survival (base R) package, the log-rank and Cox estimation procedures apply (by default) a correction to “fix” roundoff errors. These are implemented with the timefix option (by default timefix = TRUE) via the aeqSurv() function. However, in the simtrial package, (and also Hmisc), such a correction is not implemented; Consequently, there can be discrepancies between simtrial and base R survival (survdiff(), coxph(), and survfit()).

For details on the aeqSurv() function, see Therneau, 2016 and the ?aeqSurv function documentation.

In the following, we describe a simulation scenario where a discrepancy is generated and illustrate how discrepancies can be resolved (if desired) by pre-processing survival times with aeqSurv() and thus replicating survdiff() and coxph() default calculations.

In the simulated dataset, two observations are generated:

Observation \(i=464\) with survival time \(Y=0.306132722582\).
Observation \(i=516\) with survival time \(Y=0.306132604679\).
Per aeqSurv(), these times are tied and set to \(Y=0.306132604679\).
The log-rank and Cox estimates can therefore differ between other approaches without the “timefix” correction.

Scenario definitions

We define various true data generating model scenarios and convert for use in gsDesign2. Here, we are using a single scenario where discrepancies were found. This is just for illustration to inform the user of simtrial that discrepancies can occur and how to resolve via aeqSurv(), if desired.

survival_at_24_months <- 0.35
hr <- log(.35) / log(.25)
control_median <- 12
control_rate <- c(log(2) / control_median, (log(.25) - log(.2)) / 12)

scenarios <- tribble(
  ~Scenario, ~Name,           ~Period, ~duration, ~Survival,
  0,         "Control",       0,       0,         1,
  0,         "Control",       1,       24,        .25,
  0,         "Control",       2,       12,        .2,
  1,         "PH",            0,       0,         1,
  1,         "PH",            1,       24,        .35,
  1,         "PH",            2,       12,        .2^hr,
  2,         "3-month delay", 0,       0,         1,
  2,         "3-month delay", 1,       3,         exp(-3 * control_rate[1]),
  2,         "3-month delay", 2,       21,        .35,
  2,         "3-month delay", 3,       12,        .2^hr,
  3,         "6-month delay", 0,       0,         1,
  3,         "6-month delay", 1,       6,         exp(-6 * control_rate[1]),
  3,         "6-month delay", 2,       18,        .35,
  3,         "6-month delay", 3,       12,        .2^hr,
  4,         "Crossing",      0,       0,         1,
  4,         "Crossing",      1,       3,         exp(-3 * control_rate[1] * 1.3),
  4,         "Crossing",      2,       21,        .35,
  4,         "Crossing",      3,       12,        .2^hr,
  5,         "Weak null",     0,       0,         1,
  5,         "Weak null",     1,       24,        .25,
  5,         "Weak null",     2,       12,        .2,
  6,         "Strong null",   0,       0,         1,
  6,         "Strong null",   1,       3,         exp(-3 * control_rate[1] * 1.5),
  6,         "Strong null",   2,       3,         exp(-6 * control_rate[1]),
  6,         "Strong null",   3,       18,        .25,
  6,         "Strong null",   4,       12,        .2,
)
# scenarios |> gt()

fr <- scenarios |>
  group_by(Scenario) |>
  #  filter(Scenario == 2) |>
  mutate(
    Month = cumsum(duration),
    x_rate = -(log(Survival) - log(lag(Survival, default = 1))) /
      duration,
    rate = ifelse(Month > 24, control_rate[2], control_rate[1]),
    hr = x_rate / rate
  ) |>
  select(-x_rate) |>
  filter(Period > 0, Scenario > 0) |>
  ungroup()
# fr |> gt() |> fmt_number(columns = everything(), decimals = 2)

fr <- fr |> mutate(fail_rate = rate, dropout_rate = 0.001, stratum = "All")

# MWLR
mwlr <- fixed_design_mb(
  tau = 12,
  enroll_rate = define_enroll_rate(duration = 12, rate = 1),
  fail_rate = fr |> filter(Scenario == 2),
  alpha = 0.025, power = .85, ratio = 1,
  study_duration = 36
) |> to_integer()

er <- mwlr$enroll_rate

A scenario that generates a discrepancy

set.seed(3219)

dgm <- fr[c(14:17), ]

fail_rate <- data.frame(
  stratum = rep("All", 2 * nrow(dgm)),
  period = rep(dgm$Period, 2),
  treatment = c(
    rep("control", nrow(dgm)),
    rep("experimental", nrow(dgm))
  ),
  duration = rep(dgm$duration, 2),
  rate = c(dgm$rate, dgm$rate * dgm$hr)
)

dgm$stratum <- "All"
# Constant dropout rate for both treatment arms and all scenarios
dropout_rate <- data.frame(
  stratum = rep("All", 2),
  period = rep(1, 2),
  treatment = c("control", "experimental"),
  duration = rep(100, 2),
  rate = rep(.001, 2)
)

Simulated dataset with discrepancy between logrank test of simtrial::wlr() and survdiff() (also compare to score test of coxph() [same as survdiff() with default timefix = TRUE]).

ss <- 395

set.seed(8316951 + ss * 1000)

# Generate a dataset
dat <- sim_pw_surv(
  n = 698,
  enroll_rate = er,
  fail_rate = fail_rate,
  dropout_rate = dropout_rate
)

analysis_data <- cut_data_by_date(dat, 36)

dfa <- analysis_data

dfa$treat <- ifelse(dfa$treatment == "experimental", 1, 0)

z1 <- dfa |> wlr(weight = fh(rho = 0, gamma = 0))

check <- survdiff(Surv(tte, event) ~ treat, data = dfa)

# Note, for `coxph()`, use
# cph.score <- summary(coxph(Surv(tte, event) ~ treat, data = dfa, control = coxph.control(timefix = TRUE)))$sctest

cat("Log-rank wlr() vs survdiff()", c(z1$z^2, check$chisq), "\n")

## Log-rank wlr() vs survdiff() 0.1577428 0.1577954

Verify that timefix = FALSE in coxph() agrees with wlr():

cph.score <- summary(coxph(
  Surv(tte, event) ~ treat,
  data = dfa,
  control = coxph.control(timefix = FALSE)
))$sctest
cat("Log-rank wlr() vs Cox score z^2", c(z1$z^2, cph.score["test"]), "\n")

## Log-rank wlr() vs Cox score z^2 0.1577428 0.1577428

Pre-processing survival times with aeqSurv() to implement timefix = TRUE procedure.

Verify wlr() and survdiff() now agree.

Y <- dfa[, "tte"]
Delta <- dfa[, "event"]

tfixed <- aeqSurv(Surv(Y, Delta))
Y <- tfixed[, "time"]
Delta <- tfixed[, "status"]
# Use aeqSurv version
dfa$tte2 <- Y
dfa$event2 <- Delta

# wlr() after "timefix"
dfa2 <- dfa
dfa2$tte <- dfa2$tte2
dfa2$event <- dfa2$event2
z1new <- dfa2 |> wlr(weight = fh(rho = 0, gamma = 0))
cat("Log-rank wlr() with timefix vs survdiff() z^2", c(z1new$z^2, check$chisq), "\n")

## Log-rank wlr() with timefix vs survdiff() z^2 0.1577954 0.1577954

Where do they differ (tte2 are times after aeqSurv())?

dfa <- dfa[order(dfa$tte2), ]

id <- seq(1, nrow(dfa))

diff <- exp(dfa$tte) - exp(dfa$tte2)
id_diff <- which(abs(diff) > 0)

tolook <- seq(id_diff - 2, id_diff + 2)

dfcheck <- dfa[tolook, c("tte", "tte2", "event", "event2", "treatment")]
print(dfcheck, digits = 12)

##                tte           tte2 event event2    treatment
## 13  0.276251560170 0.276251560170     1      1 experimental
## 143 0.298789385712 0.298789385712     1      1      control
## 464 0.306132722582 0.306132604679     1      1      control
## 516 0.306132604679 0.306132604679     1      1 experimental
## 605 0.336489970678 0.336489970678     1      1 experimental

Verify coxph() (default) and coxph() with aeqSurv() pre-processing (using tte2 as outcome and setting timefix = FALSE) are identical:

Also note that here ties do not have impact because in separate arms.

# Check Cox with ties
cox_breslow <- summary(coxph(Surv(tte, event) ~ treatment, data = dfa, ties = "breslow"))$conf.int
cox_efron <- summary(coxph(Surv(tte, event) ~ treatment, data = dfa, ties = "efron"))$conf.int
cat("Cox Breslow and Efron hr (tte, timefix=TRUE):", c(cox_breslow[1], cox_efron[1]), "\n")

## Cox Breslow and Efron hr (tte, timefix=TRUE): 0.9657106 0.9657106

# Here ties do not have impact because in separate arms
cox_breslow <- summary(coxph(Surv(tte2, event2) ~ treatment, data = dfa, ties = "breslow", control = coxph.control(timefix = FALSE)))$conf.int
cox_efron <- summary(coxph(Surv(tte2, event2) ~ treatment, data = dfa, ties = "efron", control = coxph.control(timefix = FALSE)))$conf.int
cat("Cox Breslow and Efron hr (tte2, timefix=FALSE):", c(cox_breslow[1], cox_efron[1]), "\n")

## Cox Breslow and Efron hr (tte2, timefix=FALSE): 0.9657106 0.9657106

So here there is a difference between tte and tte2 times, but there is not an impact of ties for Cox between "breslow" and "efron" because the ties (single tie in tte2) are in separate arms.

Lastly, artificially change treatment so that two observations are tied within the same treatment arm which generates difference between "breslow" and "efron" options for ties:

# Create tie within treatment arm by changing treatment
dfa3 <- dfa
dfa3[19, "treat"] <- 1.0

cox_breslow <- summary(coxph(Surv(tte, event) ~ treat, data = dfa3, ties = "breslow", control = coxph.control(timefix = TRUE)))$conf.int
cox_efron <- summary(coxph(Surv(tte, event) ~ treat, data = dfa3, ties = "efron", control = coxph.control(timefix = TRUE)))$conf.int
cat("Cox Breslow and Efron hr (tte, timefix=TRUE)=", c(cox_breslow[1], cox_efron[1]), "\n")

## Cox Breslow and Efron hr (tte, timefix=TRUE)= 0.9729723 0.9729778

Same as

cox_breslow <- summary(coxph(Surv(tte2, event2) ~ treat, data = dfa3, ties = "breslow", control = coxph.control(timefix = FALSE)))$conf.int
cox_efron <- summary(coxph(Surv(tte2, event2) ~ treat, data = dfa3, ties = "efron", control = coxph.control(timefix = FALSE)))$conf.int
cat("Cox Breslow and Efron hr (tte2, timefix=FALSE)=", c(cox_breslow[1], cox_efron[1]), "\n")

## Cox Breslow and Efron hr (tte2, timefix=FALSE)= 0.9729723 0.9729778

Note on potential discrepancies between simtrial and survdiff

Larry Leon

Overview

Scenario definitions

A scenario that generates a discrepancy