Lab Review For Day 3

Exercise 9. Modelling cause-specific mortality using Cox regression

## Load packages
library(biostat3)
library(dplyr)     # for data manipulation
library(splines)   # ns (recommended package)
## Data manipulation
melanoma.l <- biostat3::melanoma %>%
    filter(stage == "Localised") %>%
    mutate(death_cancer = as.numeric(status == "Dead: cancer"))

melanoma.l2 <- mutate(melanoma.l,
                      ## Create a death indicator (only count deaths within 120 months)
                      death_cancer = death_cancer * as.numeric(surv_mm <= 120),
                      ## Create a new time variable
                      surv_mm = pmin(surv_mm, 120))

(a)

coxfit9a <- coxph(Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)
tidy(coxfit9a)
# A tibble: 1 x 7
  term                   estimate std.error statistic p.value conf.low conf.high
  <chr>                     <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 year8594Diagnosed 85-~    0.776    0.0658     -3.84 1.21e-4    0.683     0.883

Mortality due to skin melanoma has decreased by 22% in the latter period compared to the earlier period.

The regression equation is

\[ h(t|\text{year8594})=h_0(t)\exp(β_1I(\text{year8594=``Diagnosed 85-94"})) \]

Plot the hazard rates

par(mfrow = c(1,2))

plot(muhaz2(Surv(surv_mm/12, death_cancer) ~ year8594, data = melanoma.l2),
     xlab = "Years since diagnosis", col = c("blue","red"), lty = 1:2)

plot(muhaz2(Surv(surv_mm/12, death_cancer) ~ year8594, data = melanoma.l2),
     xlab = "Years since diagnosis", col = c("blue","red"), lty = 1:2, log = "y")

(b)

## Log-rank test
survdiff(Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)
Call:
survdiff(formula = Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)

                            N Observed Expected (O-E)^2/E (O-E)^2/V
year8594=Diagnosed 75-84 2145      519      460      7.44      14.9
year8594=Diagnosed 85-94 3173      441      500      6.86      14.9

 Chisq= 14.9  on 1 degrees of freedom, p= 1e-04 
## Wald test & Likelihood ratio test
summary( coxfit9b <- coxph(Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2) )
Call:
coxph(formula = Surv(surv_mm, death_cancer) ~ year8594, data = melanoma.l2)

  n= 5318, number of events= 960 

                            coef exp(coef) se(coef)      z Pr(>|z|)    
year8594Diagnosed 85-94 -0.25297   0.77649  0.06579 -3.845 0.000121 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                        exp(coef) exp(-coef) lower .95 upper .95
year8594Diagnosed 85-94    0.7765      1.288    0.6825    0.8834

Concordance= 0.533  (se = 0.008 )
Likelihood ratio test= 14.83  on 1 df,   p=1e-04
Wald test            = 14.78  on 1 df,   p=1e-04
Score (logrank) test = 14.86  on 1 df,   p=1e-04

(c)

coxfit9c <- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp, data = melanoma.l2)
tidy(coxfit9c)
# A tibble: 5 x 7
  term                  estimate std.error statistic  p.value conf.low conf.high
  <chr>                    <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 year8594Diagnosed 85~    0.716    0.0662     -5.04 4.72e- 7    0.629     0.816
2 sexFemale                0.588    0.0655     -8.11 5.19e-16    0.517     0.669
3 agegrp45-59              1.33     0.0942      3.00 2.67e- 3    1.10      1.60 
4 agegrp60-74              1.86     0.0909      6.82 8.90e-12    1.56      2.22 
5 agegrp75+                3.38     0.104      11.7  1.97e-31    2.75      4.15 

The regression equation is

\[\begin{aligned} h(t|\text{year8594,sex,agegrp}) & = h_0(t)\exp\bigg(β_1I(\text{year8594=``Diagnosed 85-94"})+β_2I(\text{sex=``Female"}) \\ & + β_3I(\text{agegrp=``45-59"})+β_4I(\text{agegrp=``60-74"})+β_5I(\text{agegrp=``75+"})\bigg) \end{aligned}\]
## Test if the effect of age is significant
## Use a Wald test with the car package
library(car)
linearHypothesis(coxfit9c, c("agegrp45-59 = 0", "agegrp60-74 = 0", "agegrp75+ = 0"))
Linear hypothesis test

Hypothesis:
agegrp45 - 59 = 0
agegrp60 - 74 = 0
agegrp75  +  = 0

Model 1: restricted model
Model 2: Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp

  Res.Df Df  Chisq Pr(>Chisq)    
1   5316                         
2   5313  3 154.38  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(d)

## Test if the effect of age is significant
## Use an LR test with the anova function
## Fit a model without agegrp
coxfit9d <- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex, data = melanoma.l2)

## LR test
anova(coxfit9c, coxfit9d)
Analysis of Deviance Table
 Cox model: response is  Surv(surv_mm, death_cancer)
 Model 1: ~ year8594 + sex + agegrp
 Model 2: ~ year8594 + sex
   loglik  Chisq Df P(>|Chi|)    
1 -7792.7                        
2 -7864.4 143.37  3 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(coxfit9c)
Analysis of Deviance Table
 Cox model: response is Surv(surv_mm, death_cancer)
Terms added sequentially (first to last)

          loglik   Chisq Df Pr(>|Chi|)    
NULL     -7899.0                          
year8594 -7891.6  14.829  1  0.0001177 ***
sex      -7864.4  54.495  1  1.559e-13 ***
agegrp   -7792.7 143.365  3  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(e)

### Cox regression model 
coxfit9e <- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp, 
                  data = melanoma.l2)

### Poisson regression model 
## Split follow up by year
melanoma.spl <- survSplit(melanoma.l2, cut = 12*(0:10), end = "surv_mm", start = "start", 
                          event = "death_cancer")

## Calculate person-time and recode start time as a factor
melanoma.spl <- mutate(melanoma.spl,
                       pt = surv_mm - start,
                       fu = as.factor(start))

## Run Poisson regression
poisson9e <- glm(death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt)),
                 family = poisson, data = melanoma.spl)

library(gtsummary)
tbl_merge(
  list(
    tbl_regression(poisson9e, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4), include = !fu),
    tbl_regression(coxfit9e, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4))
  ),
  tab_spanner = c("**Poisson**", "**Cox**")
)
Characteristic Poisson Cox
IRR1 95% CI1 p-value HR1 95% CI1 p-value
year8594
Diagnosed 75-84
Diagnosed 85-94 0.7224 0.6344, 0.8224 <0.001 0.7165 0.6293, 0.8157 <0.001
sex
Male
Female 0.5875 0.5167, 0.6678 <0.001 0.5882 0.5174, 0.6688 <0.001
agegrp
0-44
45-59 1.3278 1.1048, 1.5985 0.003 1.3269 1.1032, 1.5959 0.003
60-74 1.8624 1.5603, 2.2285 <0.001 1.8590 1.5557, 2.2215 <0.001
75+ 3.4003 2.7696, 4.1722 <0.001 3.3804 2.7547, 4.1483 <0.001
1 IRR = Incidence Rate Ratio, CI = Confidence Interval, HR = Hazard Ratio

(f)

### Cox regression model 
coxfit9f <- coxph(Surv(surv_mm, death_cancer) ~ year8594 + sex + agegrp, 
                  data = melanoma.l2, ties = "breslow")

### Poisson regression model 
sfit9f <- survfit(Surv(surv_mm, death_cancer) ~ 1, data = melanoma.l2 )

## Split follow up by event times
melanoma.spl2 <- survSplit(melanoma.l2, cut = sfit9f$time, end = "surv_mm", start = "start",
                           event = "death_cancer")

## Calculate person-time and recode start time as a factor
melanoma.spl2 <- mutate(melanoma.spl2,
                        pt = surv_mm - start,
                        fu = as.factor(start))

## Collapse
melanoma.spl3 <- melanoma.spl2 %>%
    group_by(fu, year8594, sex, agegrp) %>%
    summarise(pt = sum(pt), death_cancer = sum(death_cancer)) %>%
    data.frame

## Run Poisson regression
poisson9f <- glm(death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt)),
                 family = poisson, data = melanoma.spl3)

tbl_merge(
  list(
    tbl_regression(poisson9f, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4), include = !fu),
    tbl_regression(coxfit9f, exponentiate = TRUE, estimate_fun = ~ style_ratio(.x, digits = 4))
  ),
  tab_spanner = c("**Poisson**", "**Cox**")
)
Characteristic Poisson Cox
IRR1 95% CI1 p-value HR1 95% CI1 p-value
year8594
Diagnosed 75-84
Diagnosed 85-94 0.7169 0.6297, 0.8162 <0.001 0.7169 0.6297, 0.8162 <0.001
sex
Male
Female 0.5888 0.5179, 0.6694 <0.001 0.5888 0.5179, 0.6694 <0.001
agegrp
0-44
45-59 1.3264 1.1028, 1.5953 0.003 1.3264 1.1028, 1.5953 0.003
60-74 1.8573 1.5543, 2.2194 <0.001 1.8573 1.5543, 2.2194 <0.001
75+ 3.3727 2.7484, 4.1387 <0.001 3.3727 2.7484, 4.1387 <0.001
1 IRR = Incidence Rate Ratio, CI = Confidence Interval, HR = Hazard Ratio

(g)

## split and collapse
cuts.splines <- seq(0, max(sfit9f$time), by = 3)
mid.splines <- cuts.splines + 1.5
melanoma.spl4 <-
    survSplit(Surv(surv_mm, death_cancer) ~ ., data = melanoma.l2, cut = cuts.splines,
              start = "tstart", end = "tstop") %>%
    mutate(cut = cut(tstop, cuts.splines),
           mid = mid.splines[unclass(cut)]) %>%
    group_by(mid, year8594, sex, agegrp) %>%
    summarise(pt = sum(tstop - tstart), 
              death_cancer = sum(death_cancer))

poisson9g <- glm(death_cancer ~ ns(mid, df = 3) + year8594 + sex + agegrp + offset(log(pt)),
                 family = poisson, data = melanoma.spl4)
tidy(poisson9g)
# A tibble: 9 x 7
  term                  estimate std.error statistic  p.value conf.low conf.high
  <chr>                    <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 (Intercept)            0.00139    0.126     -52.1  0         0.00108   0.00177
2 ns(mid, df = 3)1       0.444      0.168      -4.85 1.26e- 6  0.318     0.614  
3 ns(mid, df = 3)2       3.49       0.273       4.58 4.55e- 6  2.05      5.97   
4 ns(mid, df = 3)3       0.370      0.194      -5.11 3.18e- 7  0.249     0.534  
5 year8594Diagnosed 85~  0.724      0.0662     -4.88 1.04e- 6  0.635     0.824  
6 sexFemale              0.588      0.0655     -8.12 4.61e-16  0.517     0.668  
7 agegrp45-59            1.33       0.0942      3.02 2.51e- 3  1.11      1.60   
8 agegrp60-74            1.86       0.0909      6.86 7.01e-12  1.56      2.23   
9 agegrp75+              3.41       0.104      11.7  8.86e-32  2.77      4.18   
## quick approach: use the effects package
library(effects)
plot(Effect("mid", poisson9g))

## utility function to draw a confidence interval
polygon.ci <- function(time, interval, col = "lightgrey") 
    polygon(c(time, rev(time)), c(interval[,1], rev(interval[,2])), col = col, border = col)

## define exposures
times <- seq(0, max(cuts.splines), length = 1000)
delta <- diff(times)[1]
newdata <- data.frame(mid = times + delta/2, year8594 = "Diagnosed 85-94",
                      sex = "Male", agegrp = "45-59",
                      pt = 1)

## predict rates and 95% CI
pred <- predict(poisson9g, newdata = newdata, se.fit = TRUE)
predrate <- exp(pred$fit)
ci <- with(pred, exp(cbind(fit - 1.96*se.fit, fit + 1.96*se.fit)))

## plot
matplot(newdata$mid, ci, type = "n", 
        xlab = "Time since diagnosis (months)",
        ylab = "Rate",
        main = "Males aged 45-59 years diagnosed 1985-94")
polygon.ci(newdata$mid, ci) 
lines(newdata$mid, predrate)

## predict survival and 95% CI
library(rstpm2)
logcumhaz <- rstpm2::predictnl(poisson9g, newdata = newdata,
                               fun = function(fit, newdata) log(cumsum(delta*predict(fit, newdata, type = "response"))))
surv <- exp(-exp(logcumhaz$Estimate))
ci <- with(logcumhaz, exp(-exp(cbind(Estimate - 1.96*SE, Estimate + 1.96*SE))))

## plot
matplot(newdata$mid, ci, type = "n", 
        xlab = "Time since diagnosis (months)",
        ylab = "Survival", 
        main = "Males aged 45-59 years diagnosed 1985-94")
polygon.ci(newdata$mid, ci) 
lines(newdata$mid, surv)

Exercise 10. Examining the proportional hazards hypothesis

## Load packages
library(biostat3)
library(dplyr)    
library(car)
## Data manipulation
localised <- biostat3::melanoma %>% 
    filter(stage == "Localised") %>%
    mutate(death_cancer = ifelse(status == "Dead: cancer" & surv_mm <= 120, 1, 0), # censoring at 120 months
           trunc_yy = pmin(surv_mm/12, 10))  # scale to years and truncate at 10 years

(a)

# Using muhaz2 to smooth the Kaplan-Meier hazards by strata
hazDiaDate <- muhaz2(Surv(trunc_yy, death_cancer) ~ year8594, data = localised)

## Comparing hazards
plot(hazDiaDate, haz.scale = 1000,
     xlab = "Time since diagnosis (years)", 
     ylab = "Hazard per 1000 person-years")

(b)

## Comparing hazards on the log scale
plot(hazDiaDate, haz.scale = 1000, log = "y",
     xlab = "Time since diagnosis (years)", 
     ylab = "Hazard per 1000 person-years")

(c)

## Calculating -log cumulative hazards per strata
survfit1 <- survfit(Surv(trunc_yy, death_cancer) ~ year8594, data = localised)
plot(survfit1, col = 1:2, fun = function(S) -log(-log(S)), log = "x",
     xlab = "log(time)", ylab = "-log(H)")
legend("topright", legend = levels(localised$year8594), col = 1:2, lty = 1)

(d)

cox1 <- coxph(Surv(trunc_yy, death_cancer) ~ year8594, data = localised)
tidy(cox1)
# A tibble: 1 x 7
  term                   estimate std.error statistic p.value conf.low conf.high
  <chr>                     <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 year8594Diagnosed 85-~    0.776    0.0658     -3.84 1.21e-4    0.683     0.883

(e)

cox2 <- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp, data = localised)
tidy(cox2)
# A tibble: 5 x 7
  term                  estimate std.error statistic  p.value conf.low conf.high
  <chr>                    <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 sexFemale                0.588    0.0655     -8.11 5.19e-16    0.517     0.669
2 year8594Diagnosed 85~    0.716    0.0662     -5.04 4.72e- 7    0.629     0.816
3 agegrp45-59              1.33     0.0942      3.00 2.67e- 3    1.10      1.60 
4 agegrp60-74              1.86     0.0909      6.82 8.90e-12    1.56      2.22 
5 agegrp75+                3.38     0.104      11.7  1.97e-31    2.75      4.15 
## Plot of the scaled Schoenfeld residuals for calendar period.
## The smooth line shows the estimated log hazard ratio as a function of time.
cox2.phtest <- cox.zph(cox2, transform = "identity") # Stata appears to be using 'identity'
plot(cox2.phtest[2], resid = TRUE, se = TRUE, main = "Schoenfeld residuals", ylim = c(-4,4))

(f)

par(mfrow = c(2, 2))

plot(muhaz2(Surv(trunc_yy, death_cancer) ~ agegrp, data = localised), haz.scale = 1000, log = "y", lty = 1,
     xlab = "Time since diagnosis (years)", 
     ylab = "Hazard per 1000 person-years")

plot(survfit(Surv(trunc_yy, death_cancer) ~ agegrp, data = localised), col = 1:4, 
     fun = function(S) -log(-log(S)), log = "x",
     xlab = "log(time)", ylab = "-log(H)")
legend("topright", legend = levels(localised$agegrp), col = 1:4, lty = 1)

plot(cox2.phtest[3], resid = TRUE, se = TRUE, main = "Schoenfeld residuals", ylim = c(-4,4))

(g)

## The results from the previous proportional hazards assumption test
print(cox2.phtest)
         chisq df      p
sex       1.17  1 0.2784
year8594  1.57  1 0.2096
agegrp   15.93  3 0.0012
GLOBAL   20.45  5 0.0010

(h)

melanoma2p8Split <- survSplit(localised, cut = 2, end = "trunc_yy", start = "start",
                              event = "death_cancer", episode = "fu") %>%
    mutate(fu = as.factor(fu))

melanoma2p8Split %>% select(id, start, trunc_yy) %>% filter(id <= 3) %>% arrange(id, trunc_yy)
  id start  trunc_yy
1  1     0  2.000000
2  1     2  2.208333
3  2     0  2.000000
4  2     2  4.625000
5  3     0  2.000000
6  3     2 10.000000
cox2p8Split1 <- coxph(Surv(start, trunc_yy, death_cancer) ~ sex + year8594 + agegrp*fu,
                      data = melanoma2p8Split)
tidy(cox2p8Split1)
# A tibble: 9 x 7
  term                 estimate std.error statistic   p.value conf.low conf.high
  <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
1 sexFemale               0.590    0.0654     -8.06  7.58e-16    0.519     0.671
2 year8594Diagnosed 8~    0.715    0.0662     -5.07  4.08e- 7    0.628     0.814
3 agegrp45-59             1.70     0.196       2.70  6.89e- 3    1.16      2.50 
4 agegrp60-74             2.46     0.187       4.80  1.55e- 6    1.70      3.55 
5 agegrp75+               5.42     0.192       8.81  1.26e-18    3.72      7.89 
6 fu2                    NA        0          NA    NA          NA        NA    
7 agegrp45-59:fu2         0.725    0.224      -1.43  1.52e- 1    0.468     1.12 
8 agegrp60-74:fu2         0.693    0.215      -1.71  8.72e- 2    0.455     1.06 
9 agegrp75+:fu2           0.493    0.232      -3.05  2.29e- 3    0.313     0.776

The regression equation is

\[\begin{aligned} h(t|\text{year8594,sex,agegrp,fu}) & = h_0(t)\exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"}) \\ & + β_3I(\text{agegrp=``45-59"}) + β_4I(\text{agegrp=``60-74"}) + β_5I(\text{agegrp=``75+"}) \\ & + β_6I(\text{agegrp=``45-59" \& fu=2}) + β_7I(\text{agegrp=``60-74" \& fu=2}) + β_8I(\text{agegrp=``75+" \& fu=2})\bigg) \end{aligned}\]
## LR test
cox2p8Split1b <- coxph(Surv(start, trunc_yy, death_cancer) ~ sex + year8594 + agegrp + fu,
                       data = melanoma2p8Split)

anova(cox2p8Split1, cox2p8Split1b)
Analysis of Deviance Table
 Cox model: response is  Surv(start, trunc_yy, death_cancer)
 Model 1: ~ sex + year8594 + agegrp * fu
 Model 2: ~ sex + year8594 + agegrp + fu
   loglik  Chisq Df P(>|Chi|)  
1 -7787.7                      
2 -7792.7 9.8356  3   0.02002 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(i)

## Reparameterise the model
cox2p8Split2 <- coxph(Surv(start, trunc_yy, death_cancer) ~ sex + year8594 + fu + fu:agegrp, 
                      data = melanoma2p8Split)
tidy(cox2p8Split2)
# A tibble: 9 x 7
  term                 estimate std.error statistic   p.value conf.low conf.high
  <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
1 sexFemale               0.590    0.0654     -8.06  7.58e-16    0.519     0.671
2 year8594Diagnosed 8~    0.715    0.0662     -5.07  4.08e- 7    0.628     0.814
3 fu2                    NA        0          NA    NA          NA        NA    
4 fu1:agegrp45-59         1.70     0.196       2.70  6.89e- 3    1.16      2.50 
5 fu2:agegrp45-59         1.23     0.108       1.95  5.17e- 2    0.998     1.52 
6 fu1:agegrp60-74         2.46     0.187       4.80  1.55e- 6    1.70      3.55 
7 fu2:agegrp60-74         1.70     0.105       5.09  3.59e- 7    1.39      2.09 
8 fu1:agegrp75+           5.42     0.192       8.81  1.26e-18    3.72      7.89 
9 fu2:agegrp75+           2.67     0.132       7.46  8.75e-14    2.06      3.45 

The regression equation is

\[\begin{aligned} h(t|\text{year8594,sex,agegrp,fu}) & = h_0(t)\exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"}) \\ & + β_3I(\text{agegrp=``45-59" \& fu=1}) + β_4I(\text{agegrp=``45-59" \& fu=2}) + β_5I(\text{agegrp=``60-74" \& fu=1}) \\ & + β_6I(\text{agegrp=``60-74" \& fu=2}) + β_7I(\text{agegrp=``75+" \& fu=1}) + β_8I(\text{agegrp=``75+" \& fu=2})\bigg) \end{aligned}\]
0�“2 years 2+ years
agegrp0-44 1.00 1.00
agegrp45-59 1.70 1.23
agegrp60-74 2.46 1.70
agegrp75+ 5.42 2.67
## Alternative approach using tt():
## https://cran.r-project.org/web/packages/survival/vignettes/timedep.pdf
cox2p8tvc2 <- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp +
                        tt(agegrp=="45-59") + tt(agegrp=="60-74") + tt(agegrp=="75+"),
                    data = localised,
                    tt = function(x, t, ...) x*(t>=2))
tidy(cox2p8tvc2)
# A tibble: 8 x 7
  term                  estimate std.error statistic  p.value conf.low conf.high
  <chr>                    <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 "sexFemale"              0.590    0.0654     -8.06 7.58e-16    0.519     0.671
2 "year8594Diagnosed 8~    0.715    0.0662     -5.07 4.08e- 7    0.628     0.814
3 "agegrp45-59"            1.70     0.196       2.70 6.89e- 3    1.16      2.50 
4 "agegrp60-74"            2.46     0.187       4.80 1.55e- 6    1.70      3.55 
5 "agegrp75+"              5.42     0.192       8.81 1.26e-18    3.72      7.89 
6 "tt(agegrp == \"45-5~    0.725    0.224      -1.43 1.52e- 1    0.468     1.12 
7 "tt(agegrp == \"60-7~    0.693    0.215      -1.71 8.72e- 2    0.455     1.06 
8 "tt(agegrp == \"75+\~    0.493    0.232      -3.05 2.29e- 3    0.313     0.776
cox2p8tvct <- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp + tt(agegrp),
                    data = localised,
                    tt = function(x, t, ...) cbind(`45-59`=(x=="45-59")*t,
                                                   `60-74`=(x=="60-74")*t,
                                                   `75+`=(x=="75+")*t))
tidy(cox2p8tvct)
# A tibble: 8 x 7
  term                  estimate std.error statistic  p.value conf.low conf.high
  <chr>                    <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 sexFemale                0.592    0.0654     -8.02 1.10e-15    0.521     0.673
2 year8594Diagnosed 85~    0.713    0.0663     -5.10 3.48e- 7    0.627     0.812
3 agegrp45-59              1.64     0.186       2.67 7.56e- 3    1.14      2.36 
4 agegrp60-74              2.70     0.179       5.57 2.52e- 8    1.91      3.84 
5 agegrp75+                6.64     0.205       9.26 2.07e-20    4.45      9.92 
6 tt(agegrp)45-59          0.949    0.0405     -1.30 1.95e- 1    0.876     1.03 
7 tt(agegrp)60-74          0.906    0.0404     -2.44 1.49e- 2    0.837     0.981
8 tt(agegrp)75+            0.806    0.0574     -3.77 1.65e- 4    0.720     0.901
lincom(cox2p8tvct, "agegrp75+", eform = TRUE)                   # t=0
          Estimate    2.5 %   97.5 %    Chisq   Pr(>Chisq)
agegrp75+ 6.644778 4.450099 9.921819 85.72268 2.070247e-20
lincom(cox2p8tvct, "agegrp75+ + tt(agegrp)75+", eform = TRUE)   # t=1
                          Estimate    2.5 %   97.5 %    Chisq   Pr(>Chisq)
agegrp75+ + tt(agegrp)75+  5.35269 3.915088 7.318171 110.5215 7.532419e-26
lincom(cox2p8tvct, "agegrp75+ + 2*tt(agegrp)75+", eform = TRUE) # t=2
                            Estimate    2.5 %   97.5 %    Chisq   Pr(>Chisq)
agegrp75+ + 2*tt(agegrp)75+  4.31185 3.373556 5.511113 136.2282 1.778638e-31
cox2p8tvclogt <- coxph(Surv(trunc_yy, death_cancer) ~ sex + year8594 + agegrp + tt(agegrp),
                       data = localised,
                       tt = function(x, t, ...) cbind(`45-59`=(x=="45-59")*log(t),
                                                      `60-74`=(x=="60-74")*log(t),
                                                      `75+`=(x=="75+")*log(t)))
tidy(cox2p8tvclogt)
# A tibble: 8 x 7
  term                  estimate std.error statistic  p.value conf.low conf.high
  <chr>                    <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 sexFemale                0.592    0.0654     -8.01 1.15e-15    0.521     0.673
2 year8594Diagnosed 85~    0.713    0.0663     -5.10 3.38e- 7    0.626     0.812
3 agegrp45-59              1.77     0.195       2.92 3.47e- 3    1.21      2.59 
4 agegrp60-74              2.77     0.185       5.51 3.51e- 8    1.93      3.99 
5 agegrp75+                6.68     0.190      10.0  1.26e-23    4.61      9.69 
6 tt(agegrp)45-59          0.789    0.143      -1.66 9.75e- 2    0.595     1.04 
7 tt(agegrp)60-74          0.708    0.138      -2.49 1.26e- 2    0.540     0.929
8 tt(agegrp)75+            0.480    0.158      -4.63 3.67e- 6    0.352     0.655
# t=0.5 => log(t)=-0.6931472
lincom(cox2p8tvclogt, "agegrp75+ - 0.6931472*tt(agegrp)75+", eform = TRUE)
                                    Estimate    2.5 %   97.5 %    Chisq
agegrp75+ - 0.6931472*tt(agegrp)75+ 11.10348 6.347921 19.42166 71.20567
                                      Pr(>Chisq)
agegrp75+ - 0.6931472*tt(agegrp)75+ 3.218622e-17
# t=1 => log(t)=0
lincom(cox2p8tvclogt, "agegrp75+", eform = TRUE)
          Estimate    2.5 %   97.5 %    Chisq   Pr(>Chisq)
agegrp75+ 6.679893 4.606969 9.685537 100.3693 1.264746e-23
# t=2 => log(t)=0.6931472
lincom(cox2p8tvclogt, "agegrp75+ + 0.6931472*tt(agegrp)75+", eform = TRUE)
                                    Estimate   2.5 %   97.5 %    Chisq
agegrp75+ + 0.6931472*tt(agegrp)75+ 4.018649 3.17166 5.091825 132.6644
                                      Pr(>Chisq)
agegrp75+ + 0.6931472*tt(agegrp)75+ 1.070656e-30

The regression equation for the cox2p8tvct model is

\[\begin{aligned} h(t|\text{year8594,sex,agegrp,fu}) & = h_0(t)\exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"}) \\ & + β_3I(\text{agegrp=``45-59"}) + β_4I(\text{agegrp=``60-74"}) + β_5I(\text{agegrp=``75+"}) \\ & + β_6I(\text{agegrp=``45-59"})t + β_7I(\text{agegrp=``60-74"})t + β_8I(\text{agegrp=``75+"})t\bigg) \end{aligned}\] \[\begin{aligned} \frac{h(t|\text{year8594,sex,agegrp=``75+"})} {h(t|\text{year8594,sex,agegrp=``0-44"})} &= \frac{h_0(t) \exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"}) + β_5 + β_8t\bigg)} {h_0(t) \exp\bigg(β_1I(\text{sex=``Female"}) + β_2I(\text{year8594=``Diagnosed 85-94"})\bigg)} \\ &= \exp(β_5 + β_8t) \end{aligned}\]

(j)

library(splines)
time.cuts <- seq(0, 10, length = 100)
delta <- diff(time.cuts)[1]

## split and collapse
melanoma2p8Split2 <- survSplit(Surv(trunc_yy,death_cancer) ~ ., data = localised,
                               cut = time.cuts, end = "tstop", start = "tstart",
                               event = "death_cancer") %>%
    mutate(fu = cut(tstop, time.cuts),
           mid = time.cuts[unclass(fu)] + delta/2) %>%
    group_by(mid, sex, year8594, agegrp) %>%
    summarise(pt = sum(tstop - tstart), death_cancer = sum(death_cancer)) %>%
    mutate(age75 = (agegrp == "75+") + 0)

poisson2p8tvc <- glm(death_cancer ~ sex + year8594 + agegrp + ns(mid, df=3) +
                         age75:ns(mid, df=3) + offset(log(pt)), 
                     data = melanoma2p8Split2, family = poisson)

## utility function to draw a confidence interval
polygon.ci <- function(time, interval, col = "lightgrey") 
    polygon(c(time, rev(time)), c(interval[,1], rev(interval[,2])), col = col, border = col)

## define exposures
newdata <- data.frame(mid = seq(0, max(time.cuts), length = 100), year8594 = "Diagnosed 85-94",
                      sex = "Male", agegrp = "75+", age75 = 1, pt = 1)

## predict IRR and 95% CI
library(rstpm2)
logirr <- rstpm2::predictnl(poisson2p8tvc, newdata = newdata,
                            fun = function(fit, newdata) predict(fit, newdata) -
                                  predict(fit, transform(newdata, agegrp = '0-44', age75 = 0)))
pred <- exp(logirr$fit)
ci <- exp(confint(logirr))

## plot
matplot(newdata$mid, ci, type = "n", 
        xlab = "Time since diagnosis (months)",
        ylab = "Rate ratio", 
        main = "Aged 75+ compared with aged 0-44 years")
polygon.ci(newdata$mid, ci) 
lines(newdata$mid, pred)