## Load packages
library(biostat3)
library(dplyr) # for data manipulation
library(splines) # ns (recommended package)
Lab Review For Day 3
Exercise 9. Modelling cause-specific mortality using Cox regression
## Data manipulation
<- biostat3::melanoma %>%
melanoma.l filter(stage == "Localised") %>%
mutate(death_cancer = as.numeric(status == "Dead: cancer"))
<- mutate(melanoma.l,
melanoma.l2 ## Create a death indicator (only count deaths within 120 months)
death_cancer = death_cancer * as.numeric(surv_mm <= 120),
## Create a new time variable
surv_mm = pmin(surv_mm, 120))
(a)
<- coxph(Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)
coxfit9a tidy(coxfit9a)
# A tibble: 1 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 year8594Diagnosed 85-~ 0.776 0.0658 -3.84 1.21e-4 0.683 0.883
Mortality due to skin melanoma has decreased by 22% in the latter period compared to the earlier period.
The regression equation is
\[ h(t|\text{year8594})=h_0(t)\exp(β_1I(\text{year8594=``Diagnosed 85-94"})) \]
Plot the hazard rates
par(mfrow = c(1,2))
plot(muhaz2(Surv(surv_mm/12, death_cancer) ~ year8594, data = melanoma.l2),
xlab = "Years since diagnosis", col = c("blue","red"), lty = 1:2)
plot(muhaz2(Surv(surv_mm/12, death_cancer) ~ year8594, data = melanoma.l2),
xlab = "Years since diagnosis", col = c("blue","red"), lty = 1:2, log = "y")
(b)
## Log-rank test
survdiff(Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)
Call:
survdiff(formula = Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)
N Observed Expected (O-E)^2/E (O-E)^2/V
year8594=Diagnosed 75-84 2145 519 460 7.44 14.9
year8594=Diagnosed 85-94 3173 441 500 6.86 14.9
Chisq= 14.9 on 1 degrees of freedom, p= 1e-04
## Wald test & Likelihood ratio test
summary( coxfit9b <- coxph(Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2) )
Call:
coxph(formula = Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)
n= 5318, number of events= 960
coef exp(coef) se(coef) z Pr(>|z|)
year8594Diagnosed 85-94 -0.25297 0.77649 0.06579 -3.845 0.000121 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
year8594Diagnosed 85-94 0.7765 1.288 0.6825 0.8834
Concordance= 0.533 (se = 0.008 )
Likelihood ratio test= 14.83 on 1 df, p=1e-04
Wald test = 14.78 on 1 df, p=1e-04
Score (logrank) test = 14.86 on 1 df, p=1e-04
(c)
<- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp, data = melanoma.l2)
coxfit9c tidy(coxfit9c)
# A tibble: 5 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 year8594Diagnosed 85~ 0.716 0.0662 -5.04 4.72e- 7 0.629 0.816
2 sexFemale 0.588 0.0655 -8.11 5.19e-16 0.517 0.669
3 agegrp45-59 1.33 0.0942 3.00 2.67e- 3 1.10 1.60
4 agegrp60-74 1.86 0.0909 6.82 8.90e-12 1.56 2.22
5 agegrp75+ 3.38 0.104 11.7 1.97e-31 2.75 4.15
The regression equation is
\[\begin{aligned} h(t|\text{year8594,sex,agegrp}) & = h_0(t)\exp\bigg(β_1I(\text{year8594=``Diagnosed 85-94"})+β_2I(\text{sex=``Female"}) \\ & + β_3I(\text{agegrp=``45-59"})+β_4I(\text{agegrp=``60-74"})+β_5I(\text{agegrp=``75+"})\bigg) \end{aligned}\]## Test if the effect of age is significant
## Use a Wald test with the car package
library(car)
linearHypothesis(coxfit9c, c("agegrp45-59 = 0", "agegrp60-74 = 0", "agegrp75+ = 0"))
Linear hypothesis test
Hypothesis:
agegrp45 - 59 = 0
agegrp60 - 74 = 0
agegrp75 + = 0
Model 1: restricted model
Model 2: Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp
Res.Df Df Chisq Pr(>Chisq)
1 5316
2 5313 3 154.38 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(d)
## Test if the effect of age is significant
## Use an LR test with the anova function
## Fit a model without agegrp
<- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex, data = melanoma.l2)
coxfit9d
## LR test
anova(coxfit9c, coxfit9d)
Analysis of Deviance Table
Cox model: response is Surv(surv_mm, death_cancer)
Model 1: ~ year8594 + sex + agegrp
Model 2: ~ year8594 + sex
loglik Chisq Df P(>|Chi|)
1 -7792.7
2 -7864.4 143.37 3 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(coxfit9c)
Analysis of Deviance Table
Cox model: response is Surv(surv_mm, death_cancer)
Terms added sequentially (first to last)
loglik Chisq Df Pr(>|Chi|)
NULL -7899.0
year8594 -7891.6 14.829 1 0.0001177 ***
sex -7864.4 54.495 1 1.559e-13 ***
agegrp -7792.7 143.365 3 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(e)
### Cox regression model
<- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp,
coxfit9e data = melanoma.l2)
### Poisson regression model
## Split follow up by year
<- survSplit(melanoma.l2, cut = 12*(0:10), end = "surv_mm", start = "start",
melanoma.spl event = "death_cancer")
## Calculate person-time and recode start time as a factor
<- mutate(melanoma.spl,
melanoma.spl pt = surv_mm - start,
fu = as.factor(start))
## Run Poisson regression
<- glm(death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt)),
poisson9e family = poisson, data = melanoma.spl)
library(gtsummary)
tbl_merge(
list(
tbl_regression(poisson9e, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4), include = !fu),
tbl_regression(coxfit9e, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4))
),tab_spanner = c("**Poisson**", "**Cox**")
)
Characteristic | Poisson | Cox | ||||
---|---|---|---|---|---|---|
IRR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |
year8594 | ||||||
Diagnosed 75-84 | — | — | — | — | ||
Diagnosed 85-94 | 0.7224 | 0.6344, 0.8224 | <0.001 | 0.7165 | 0.6293, 0.8157 | <0.001 |
sex | ||||||
Male | — | — | — | — | ||
Female | 0.5875 | 0.5167, 0.6678 | <0.001 | 0.5882 | 0.5174, 0.6688 | <0.001 |
agegrp | ||||||
0-44 | — | — | — | — | ||
45-59 | 1.3278 | 1.1048, 1.5985 | 0.003 | 1.3269 | 1.1032, 1.5959 | 0.003 |
60-74 | 1.8624 | 1.5603, 2.2285 | <0.001 | 1.8590 | 1.5557, 2.2215 | <0.001 |
75+ | 3.4003 | 2.7696, 4.1722 | <0.001 | 3.3804 | 2.7547, 4.1483 | <0.001 |
1 IRR = Incidence Rate Ratio, CI = Confidence Interval, HR = Hazard Ratio |
(f)
### Cox regression model
<- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp,
coxfit9f data = melanoma.l2, ties = "breslow")
### Poisson regression model
<- survfit(Surv(surv_mm, death_cancer) ~ 1, data = melanoma.l2 )
sfit9f
## Split follow up by event times
<- survSplit(melanoma.l2, cut = sfit9f$time, end = "surv_mm", start = "start",
melanoma.spl2 event = "death_cancer")
## Calculate person-time and recode start time as a factor
<- mutate(melanoma.spl2,
melanoma.spl2 pt = surv_mm - start,
fu = as.factor(start))
## Collapse
<- melanoma.spl2 %>%
melanoma.spl3 group_by(fu, year8594, sex, agegrp) %>%
summarise(pt = sum(pt), death_cancer = sum(death_cancer)) %>%
data.frame
## Run Poisson regression
<- glm(death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt)),
poisson9f family = poisson, data = melanoma.spl3)
tbl_merge(
list(
tbl_regression(poisson9f, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4), include = !fu),
tbl_regression(coxfit9f, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4))
),tab_spanner = c("**Poisson**", "**Cox**")
)
Characteristic | Poisson | Cox | ||||
---|---|---|---|---|---|---|
IRR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |
year8594 | ||||||
Diagnosed 75-84 | — | — | — | — | ||
Diagnosed 85-94 | 0.7169 | 0.6297, 0.8162 | <0.001 | 0.7169 | 0.6297, 0.8162 | <0.001 |
sex | ||||||
Male | — | — | — | — | ||
Female | 0.5888 | 0.5179, 0.6694 | <0.001 | 0.5888 | 0.5179, 0.6694 | <0.001 |
agegrp | ||||||
0-44 | — | — | — | — | ||
45-59 | 1.3264 | 1.1028, 1.5953 | 0.003 | 1.3264 | 1.1028, 1.5953 | 0.003 |
60-74 | 1.8573 | 1.5543, 2.2194 | <0.001 | 1.8573 | 1.5543, 2.2194 | <0.001 |
75+ | 3.3727 | 2.7484, 4.1387 | <0.001 | 3.3727 | 2.7484, 4.1387 | <0.001 |
1 IRR = Incidence Rate Ratio, CI = Confidence Interval, HR = Hazard Ratio |
(g)
## split and collapse
<- seq(0, max(sfit9f$time), by = 3)
cuts.splines <- cuts.splines + 1.5
mid.splines <-
melanoma.spl4 survSplit(Surv(surv_mm, death_cancer) ~ ., data = melanoma.l2, cut = cuts.splines,
start = "tstart", end = "tstop") %>%
mutate(cut = cut(tstop, cuts.splines),
mid = mid.splines[unclass(cut)]) %>%
group_by(mid, year8594, sex, agegrp) %>%
summarise(pt = sum(tstop - tstart),
death_cancer = sum(death_cancer))
<- glm(death_cancer ~ ns(mid, df = 3) + year8594 + sex + agegrp + offset(log(pt)),
poisson9g family = poisson, data = melanoma.spl4)
tidy(poisson9g)
# A tibble: 9 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.00139 0.126 -52.1 0 0.00108 0.00177
2 ns(mid, df = 3)1 0.444 0.168 -4.85 1.26e- 6 0.318 0.614
3 ns(mid, df = 3)2 3.49 0.273 4.58 4.55e- 6 2.05 5.97
4 ns(mid, df = 3)3 0.370 0.194 -5.11 3.18e- 7 0.249 0.534
5 year8594Diagnosed 85~ 0.724 0.0662 -4.88 1.04e- 6 0.635 0.824
6 sexFemale 0.588 0.0655 -8.12 4.61e-16 0.517 0.668
7 agegrp45-59 1.33 0.0942 3.02 2.51e- 3 1.11 1.60
8 agegrp60-74 1.86 0.0909 6.86 7.01e-12 1.56 2.23
9 agegrp75+ 3.41 0.104 11.7 8.86e-32 2.77 4.18
## quick approach: use the effects package
library(effects)
plot(Effect("mid", poisson9g))
## utility function to draw a confidence interval
<- function(time, interval, col = "lightgrey")
polygon.ci polygon(c(time, rev(time)), c(interval[,1], rev(interval[,2])), col = col, border = col)
## define exposures
<- seq(0, max(cuts.splines), length = 1000)
times <- diff(times)[1]
delta <- data.frame(mid = times + delta/2, year8594 = "Diagnosed 85-94",
newdata sex = "Male", agegrp = "45-59",
pt = 1)
## predict rates and 95% CI
<- predict(poisson9g, newdata = newdata, se.fit = TRUE)
pred <- exp(pred$fit)
predrate <- with(pred, exp(cbind(fit - 1.96*se.fit, fit + 1.96*se.fit)))
ci
## plot
matplot(newdata$mid, ci, type = "n",
xlab = "Time since diagnosis (months)",
ylab = "Rate",
main = "Males aged 45-59 years diagnosed 1985-94")
polygon.ci(newdata$mid, ci)
lines(newdata$mid, predrate)
## predict survival and 95% CI
library(rstpm2)
<- rstpm2::predictnl(poisson9g, newdata = newdata,
logcumhaz fun = function(fit, newdata) log(cumsum(delta*predict(fit, newdata, type = "response"))))
<- exp(-exp(logcumhaz$Estimate))
surv <- with(logcumhaz, exp(-exp(cbind(Estimate - 1.96*SE, Estimate + 1.96*SE))))
ci
## plot
matplot(newdata$mid, ci, type = "n",
xlab = "Time since diagnosis (months)",
ylab = "Survival",
main = "Males aged 45-59 years diagnosed 1985-94")
polygon.ci(newdata$mid, ci)
lines(newdata$mid, surv)
Exercise 10. Examining the proportional hazards hypothesis
## Load packages
library(biostat3)
library(dplyr)
library(car)
## Data manipulation
<- biostat3::melanoma %>%
localised filter(stage == "Localised") %>%
mutate(death_cancer = ifelse(status == "Dead: cancer" & surv_mm <= 120, 1, 0), # censoring at 120 months
trunc_yy = pmin(surv_mm/12, 10)) # scale to years and truncate at 10 years
(a)
# Using muhaz2 to smooth the Kaplan-Meier hazards by strata
<- muhaz2(Surv(trunc_yy, death_cancer) ~ year8594, data = localised)
hazDiaDate
## Comparing hazards
plot(hazDiaDate, haz.scale = 1000,
xlab = "Time since diagnosis (years)",
ylab = "Hazard per 1000 person-years")
(b)
## Comparing hazards on the log scale
plot(hazDiaDate, haz.scale = 1000, log = "y",
xlab = "Time since diagnosis (years)",
ylab = "Hazard per 1000 person-years")
(c)
## Calculating -log cumulative hazards per strata
<- survfit(Surv(trunc_yy, death_cancer) ~ year8594, data = localised)
survfit1 plot(survfit1, col = 1:2, fun = function(S) -log(-log(S)), log = "x",
xlab = "log(time)", ylab = "-log(H)")
legend("topright", legend = levels(localised$year8594), col = 1:2, lty = 1)
(d)
<- coxph(Surv(trunc_yy, death_cancer) ~ year8594, data = localised)
cox1 tidy(cox1)
# A tibble: 1 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 year8594Diagnosed 85-~ 0.776 0.0658 -3.84 1.21e-4 0.683 0.883
(e)
<- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp, data = localised)
cox2 tidy(cox2)
# A tibble: 5 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sexFemale 0.588 0.0655 -8.11 5.19e-16 0.517 0.669
2 year8594Diagnosed 85~ 0.716 0.0662 -5.04 4.72e- 7 0.629 0.816
3 agegrp45-59 1.33 0.0942 3.00 2.67e- 3 1.10 1.60
4 agegrp60-74 1.86 0.0909 6.82 8.90e-12 1.56 2.22
5 agegrp75+ 3.38 0.104 11.7 1.97e-31 2.75 4.15
## Plot of the scaled Schoenfeld residuals for calendar period.
## The smooth line shows the estimated log hazard ratio as a function of time.
<- cox.zph(cox2, transform = "identity") # Stata appears to be using 'identity'
cox2.phtest plot(cox2.phtest[2], resid = TRUE, se = TRUE, main = "Schoenfeld residuals", ylim = c(-4,4))
(f)
par(mfrow = c(2, 2))
plot(muhaz2(Surv(trunc_yy, death_cancer) ~ agegrp, data = localised), haz.scale = 1000, log = "y", lty = 1,
xlab = "Time since diagnosis (years)",
ylab = "Hazard per 1000 person-years")
plot(survfit(Surv(trunc_yy, death_cancer) ~ agegrp, data = localised), col = 1:4,
fun = function(S) -log(-log(S)), log = "x",
xlab = "log(time)", ylab = "-log(H)")
legend("topright", legend = levels(localised$agegrp), col = 1:4, lty = 1)
plot(cox2.phtest[3], resid = TRUE, se = TRUE, main = "Schoenfeld residuals", ylim = c(-4,4))
(g)
## The results from the previous proportional hazards assumption test
print(cox2.phtest)
chisq df p
sex 1.17 1 0.2784
year8594 1.57 1 0.2096
agegrp 15.93 3 0.0012
GLOBAL 20.45 5 0.0010
(h)
<- survSplit(localised, cut = 2, end = "trunc_yy", start = "start",
melanoma2p8Split event = "death_cancer", episode = "fu") %>%
mutate(fu = as.factor(fu))
%>% select(id, start, trunc_yy) %>% filter(id <= 3) %>% arrange(id, trunc_yy) melanoma2p8Split
id start trunc_yy
1 1 0 2.000000
2 1 2 2.208333
3 2 0 2.000000
4 2 2 4.625000
5 3 0 2.000000
6 3 2 10.000000
<- coxph(Surv(start, trunc_yy, death_cancer) ~ sex + year8594 + agegrp*fu,
cox2p8Split1 data = melanoma2p8Split)
tidy(cox2p8Split1)
# A tibble: 9 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sexFemale 0.590 0.0654 -8.06 7.58e-16 0.519 0.671
2 year8594Diagnosed 8~ 0.715 0.0662 -5.07 4.08e- 7 0.628 0.814
3 agegrp45-59 1.70 0.196 2.70 6.89e- 3 1.16 2.50
4 agegrp60-74 2.46 0.187 4.80 1.55e- 6 1.70 3.55
5 agegrp75+ 5.42 0.192 8.81 1.26e-18 3.72 7.89
6 fu2 NA 0 NA NA NA NA
7 agegrp45-59:fu2 0.725 0.224 -1.43 1.52e- 1 0.468 1.12
8 agegrp60-74:fu2 0.693 0.215 -1.71 8.72e- 2 0.455 1.06
9 agegrp75+:fu2 0.493 0.232 -3.05 2.29e- 3 0.313 0.776
The regression equation is
\[\begin{aligned} h(t|\text{year8594,sex,agegrp,fu}) & = h_0(t)\exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"}) \\ & + β_3I(\text{agegrp=``45-59"}) + β_4I(\text{agegrp=``60-74"}) + β_5I(\text{agegrp=``75+"}) \\ & + β_6I(\text{agegrp=``45-59" \& fu=2}) + β_7I(\text{agegrp=``60-74" \& fu=2}) + β_8I(\text{agegrp=``75+" \& fu=2})\bigg) \end{aligned}\]## LR test
<- coxph(Surv(start, trunc_yy, death_cancer) ~ sex + year8594 + agegrp + fu,
cox2p8Split1b data = melanoma2p8Split)
anova(cox2p8Split1, cox2p8Split1b)
Analysis of Deviance Table
Cox model: response is Surv(start, trunc_yy, death_cancer)
Model 1: ~ sex + year8594 + agegrp * fu
Model 2: ~ sex + year8594 + agegrp + fu
loglik Chisq Df P(>|Chi|)
1 -7787.7
2 -7792.7 9.8356 3 0.02002 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(i)
## Reparameterise the model
<- coxph(Surv(start, trunc_yy, death_cancer) ~ sex + year8594 + fu + fu:agegrp,
cox2p8Split2 data = melanoma2p8Split)
tidy(cox2p8Split2)
# A tibble: 9 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sexFemale 0.590 0.0654 -8.06 7.58e-16 0.519 0.671
2 year8594Diagnosed 8~ 0.715 0.0662 -5.07 4.08e- 7 0.628 0.814
3 fu2 NA 0 NA NA NA NA
4 fu1:agegrp45-59 1.70 0.196 2.70 6.89e- 3 1.16 2.50
5 fu2:agegrp45-59 1.23 0.108 1.95 5.17e- 2 0.998 1.52
6 fu1:agegrp60-74 2.46 0.187 4.80 1.55e- 6 1.70 3.55
7 fu2:agegrp60-74 1.70 0.105 5.09 3.59e- 7 1.39 2.09
8 fu1:agegrp75+ 5.42 0.192 8.81 1.26e-18 3.72 7.89
9 fu2:agegrp75+ 2.67 0.132 7.46 8.75e-14 2.06 3.45
The regression equation is
\[\begin{aligned} h(t|\text{year8594,sex,agegrp,fu}) & = h_0(t)\exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"}) \\ & + β_3I(\text{agegrp=``45-59" \& fu=1}) + β_4I(\text{agegrp=``45-59" \& fu=2}) + β_5I(\text{agegrp=``60-74" \& fu=1}) \\ & + β_6I(\text{agegrp=``60-74" \& fu=2}) + β_7I(\text{agegrp=``75+" \& fu=1}) + β_8I(\text{agegrp=``75+" \& fu=2})\bigg) \end{aligned}\]0�“2 years | 2+ years | |
---|---|---|
agegrp0-44 | 1.00 | 1.00 |
agegrp45-59 | 1.70 | 1.23 |
agegrp60-74 | 2.46 | 1.70 |
agegrp75+ | 5.42 | 2.67 |
## Alternative approach using tt():
## https://cran.r-project.org/web/packages/survival/vignettes/timedep.pdf
<- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp +
cox2p8tvc2 tt(agegrp=="45-59") + tt(agegrp=="60-74") + tt(agegrp=="75+"),
data = localised,
tt = function(x, t, ...) x*(t>=2))
tidy(cox2p8tvc2)
# A tibble: 8 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 "sexFemale" 0.590 0.0654 -8.06 7.58e-16 0.519 0.671
2 "year8594Diagnosed 8~ 0.715 0.0662 -5.07 4.08e- 7 0.628 0.814
3 "agegrp45-59" 1.70 0.196 2.70 6.89e- 3 1.16 2.50
4 "agegrp60-74" 2.46 0.187 4.80 1.55e- 6 1.70 3.55
5 "agegrp75+" 5.42 0.192 8.81 1.26e-18 3.72 7.89
6 "tt(agegrp == \"45-5~ 0.725 0.224 -1.43 1.52e- 1 0.468 1.12
7 "tt(agegrp == \"60-7~ 0.693 0.215 -1.71 8.72e- 2 0.455 1.06
8 "tt(agegrp == \"75+\~ 0.493 0.232 -3.05 2.29e- 3 0.313 0.776
<- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp + tt(agegrp),
cox2p8tvct data = localised,
tt = function(x, t, ...) cbind(`45-59`=(x=="45-59")*t,
`60-74`=(x=="60-74")*t,
`75+`=(x=="75+")*t))
tidy(cox2p8tvct)
# A tibble: 8 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sexFemale 0.592 0.0654 -8.02 1.10e-15 0.521 0.673
2 year8594Diagnosed 85~ 0.713 0.0663 -5.10 3.48e- 7 0.627 0.812
3 agegrp45-59 1.64 0.186 2.67 7.56e- 3 1.14 2.36
4 agegrp60-74 2.70 0.179 5.57 2.52e- 8 1.91 3.84
5 agegrp75+ 6.64 0.205 9.26 2.07e-20 4.45 9.92
6 tt(agegrp)45-59 0.949 0.0405 -1.30 1.95e- 1 0.876 1.03
7 tt(agegrp)60-74 0.906 0.0404 -2.44 1.49e- 2 0.837 0.981
8 tt(agegrp)75+ 0.806 0.0574 -3.77 1.65e- 4 0.720 0.901
lincom(cox2p8tvct, "agegrp75+", eform = TRUE) # t=0
Estimate 2.5 % 97.5 % Chisq Pr(>Chisq)
agegrp75+ 6.644778 4.450099 9.921819 85.72268 2.070247e-20
lincom(cox2p8tvct, "agegrp75+ + tt(agegrp)75+", eform = TRUE) # t=1
Estimate 2.5 % 97.5 % Chisq Pr(>Chisq)
agegrp75+ + tt(agegrp)75+ 5.35269 3.915088 7.318171 110.5215 7.532419e-26
lincom(cox2p8tvct, "agegrp75+ + 2*tt(agegrp)75+", eform = TRUE) # t=2
Estimate 2.5 % 97.5 % Chisq Pr(>Chisq)
agegrp75+ + 2*tt(agegrp)75+ 4.31185 3.373556 5.511113 136.2282 1.778638e-31
<- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp + tt(agegrp),
cox2p8tvclogt data = localised,
tt = function(x, t, ...) cbind(`45-59`=(x=="45-59")*log(t),
`60-74`=(x=="60-74")*log(t),
`75+`=(x=="75+")*log(t)))
tidy(cox2p8tvclogt)
# A tibble: 8 x 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sexFemale 0.592 0.0654 -8.01 1.15e-15 0.521 0.673
2 year8594Diagnosed 85~ 0.713 0.0663 -5.10 3.38e- 7 0.626 0.812
3 agegrp45-59 1.77 0.195 2.92 3.47e- 3 1.21 2.59
4 agegrp60-74 2.77 0.185 5.51 3.51e- 8 1.93 3.99
5 agegrp75+ 6.68 0.190 10.0 1.26e-23 4.61 9.69
6 tt(agegrp)45-59 0.789 0.143 -1.66 9.75e- 2 0.595 1.04
7 tt(agegrp)60-74 0.708 0.138 -2.49 1.26e- 2 0.540 0.929
8 tt(agegrp)75+ 0.480 0.158 -4.63 3.67e- 6 0.352 0.655
# t=0.5 => log(t)=-0.6931472
lincom(cox2p8tvclogt, "agegrp75+ - 0.6931472*tt(agegrp)75+", eform = TRUE)
Estimate 2.5 % 97.5 % Chisq
agegrp75+ - 0.6931472*tt(agegrp)75+ 11.10348 6.347921 19.42166 71.20567
Pr(>Chisq)
agegrp75+ - 0.6931472*tt(agegrp)75+ 3.218622e-17
# t=1 => log(t)=0
lincom(cox2p8tvclogt, "agegrp75+", eform = TRUE)
Estimate 2.5 % 97.5 % Chisq Pr(>Chisq)
agegrp75+ 6.679893 4.606969 9.685537 100.3693 1.264746e-23
# t=2 => log(t)=0.6931472
lincom(cox2p8tvclogt, "agegrp75+ + 0.6931472*tt(agegrp)75+", eform = TRUE)
Estimate 2.5 % 97.5 % Chisq
agegrp75+ + 0.6931472*tt(agegrp)75+ 4.018649 3.17166 5.091825 132.6644
Pr(>Chisq)
agegrp75+ + 0.6931472*tt(agegrp)75+ 1.070656e-30
The regression equation for the cox2p8tvct
model is
(j)
library(splines)
<- seq(0, 10, length = 100)
time.cuts <- diff(time.cuts)[1]
delta
## split and collapse
<- survSplit(Surv(trunc_yy,death_cancer) ~ ., data = localised,
melanoma2p8Split2 cut = time.cuts, end = "tstop", start = "tstart",
event = "death_cancer") %>%
mutate(fu = cut(tstop, time.cuts),
mid = time.cuts[unclass(fu)] + delta/2) %>%
group_by(mid, sex, year8594, agegrp) %>%
summarise(pt = sum(tstop - tstart), death_cancer = sum(death_cancer)) %>%
mutate(age75 = (agegrp == "75+") + 0)
<- glm(death_cancer ~ sex + year8594 + agegrp + ns(mid, df=3) +
poisson2p8tvc :ns(mid, df=3) + offset(log(pt)),
age75data = melanoma2p8Split2, family = poisson)
## utility function to draw a confidence interval
<- function(time, interval, col = "lightgrey")
polygon.ci polygon(c(time, rev(time)), c(interval[,1], rev(interval[,2])), col = col, border = col)
## define exposures
<- data.frame(mid = seq(0, max(time.cuts), length = 100), year8594 = "Diagnosed 85-94",
newdata sex = "Male", agegrp = "75+", age75 = 1, pt = 1)
## predict IRR and 95% CI
library(rstpm2)
<- rstpm2::predictnl(poisson2p8tvc, newdata = newdata,
logirr fun = function(fit, newdata) predict(fit, newdata) -
predict(fit, transform(newdata, agegrp = '0-44', age75 = 0)))
<- exp(logirr$fit)
pred <- exp(confint(logirr))
ci
## plot
matplot(newdata$mid, ci, type = "n",
xlab = "Time since diagnosis (months)",
ylab = "Rate ratio",
main = "Aged 75+ compared with aged 0-44 years")
polygon.ci(newdata$mid, ci)
lines(newdata$mid, pred)