Exercise 7. Model cause-specific mortality with Poisson regression
In this exercise we model, using Poisson regression, cause-specific mortality of patients diagnosed with localised (stage==1) melanoma.
In exercise 9 we model cause-specific mortality using Cox regression and in exercise 28 we use flexible parametric models. The aim is to illustrate that these three methods are very similar.
The aim of these exercises is to explore the similarities and differences to these three approaches to modelling. We will be comparing the results (and their interpretation) as we proceed through the exercises.
Load the diet data using time-on-study as the timescale.
You may have to install the required packages the first time you use
them. You can install a package by
install.packages("package_of_interest")
for each package
you require.
library(biostat3)
library(dplyr) # for data manipulation
library(car) # for car::linearHypothesis -> biostat3::lincom
library(knitr)
library(broom)
options(pillar.sigfig = 5) # increase the number of digits printed in tibbles
Load the melanoma data and explore it.
## Read melanoma data
## Create a new dataset with only localised cancer
melanoma.l <- subset(biostat3::melanoma, stage=="Localised")
head(melanoma.l)
summary(melanoma.l)
Rates can be modelled on different timescales, e.g., attained age, time-since-entry, calendar time. Plot the CHD incidence rates both by attained age and by time-since-entry. Is there a difference? Do the same for CHD hazard by different energy intakes (hieng).
(a)
i.
## Plot Kaplan-Meier curve using survfit
## Create a new event indicator
melanoma.l <- transform(melanoma.l,
death_cancer = as.numeric(status=="Dead: cancer") )
## Create a fitted object for our subcohort
## using survfit
sfit7a1 <- survfit(Surv(surv_mm, event=death_cancer) ~ year8594,
data = melanoma.l )
## Have a look at the fitted object
str(sfit7a1, 1)
## List of 17
## $ n : int [1:2] 2145 3173
## $ time : num [1:381] 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 ...
## $ n.risk : num [1:381] 2145 2143 2140 2138 2135 ...
## $ n.event : num [1:381] 0 3 0 2 2 0 4 5 4 5 ...
## $ n.censor : num [1:381] 2 0 2 1 3 2 3 2 5 3 ...
## $ surv : num [1:381] 1 0.999 0.999 0.998 0.997 ...
## $ std.err : num [1:381] 0 0.000809 0.000809 0.001045 0.001237 ...
## $ cumhaz : num [1:381] 0 0.0014 0.0014 0.00234 0.00327 ...
## $ std.chaz : num [1:381] 0 0.000808 0.000808 0.001044 0.001237 ...
## $ strata : Named int [1:2] 249 132
## ..- attr(*, "names")= chr [1:2] "year8594=Diagnosed 75-84" "year8594=Diagnosed 85-94"
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:381] 1 0.997 0.997 0.996 0.994 ...
## $ upper : num [1:381] 1 1 1 1 0.999 ...
## $ call : language survfit(formula = Surv(surv_mm, event = death_cancer) ~ year8594, data = melanoma.l)
## - attr(*, "class")= chr "survfit"
## Plot the survival curve (with some bells and whistles)
plot(sfit7a1,
## No automatic labelling of the curve (we do that ourselves)
mark.time=FALSE,
## Time is measured in months, but we want to see it in years
xscale=12,
## Make the plot prettier
xlab="Years since diagnosis",
ylab="S(t)",
col=c("blue","red"),
lty=c("solid","dashed"))
## Add legend too
legend("bottomleft",legend=levels(melanoma.l$year8594),col=c("blue","red"),
lty=c("solid","dashed"), bty="n")
### TRY IF YOU WANT ###
if (FALSE) {
library(survMisc)
## Note: `autoplot(sfit7a1)` was broken; I have submitted a pull request to fix this
## autoplot(sfit7a1)
## alternatively:
autoplot(sfit7a1, timeTicks = "custom", times= seq(0, 20*12, 5*12))
}
Survival is better during the latter period.
ii.
## To plot smoothed hazards, we use the bshazard package
library(bshazard)
par(mfrow=1:2)
for(level in levels(melanoma.l$year8594))
plot(bshazard(Surv(surv_mm/12, status == "Dead: cancer") ~ 1,
data=subset(melanoma.l, year8594==level)),
xlab="Years since diagnosis", main=level, xlim=c(0,20), ylim=c(0,0.08))
## Iterations: relative error in phi-hat = 1e-04
## phi= 0.7852059 sv2= 0.1031122 df= 11.13992 lambda= 7.615065
## phi= 0.780798 sv2= 0.1121082 df= 11.82055 lambda= 6.964684
## phi= 0.7797268 sv2= 0.1150673 df= 12.05156 lambda= 6.77627
## phi= 0.7794322 sv2= 0.1159797 df= 12.12331 lambda= 6.720417
## phi= 0.7793464 sv2= 0.1162555 df= 12.14504 lambda= 6.703737
## phi= 0.7793209 sv2= 0.1163384 df= 12.15157 lambda= 6.698744
## phi= 0.7793133 sv2= 0.1163632 df= 12.15353 lambda= 6.697248
## phi= 0.779311 sv2= 0.1163707 df= 12.15412 lambda= 6.6968
## Iterations: relative error in phi-hat = 1e-04
## phi= 0.803423 sv2= 0.03335213 df= 10.9523 lambda= 24.08911
## phi= 0.8115022 sv2= 0.02378285 df= 8.978924 lambda= 34.12132
## phi= 0.8155906 sv2= 0.02093603 df= 8.30537 lambda= 38.95631
## phi= 0.8174496 sv2= 0.01991585 df= 8.063728 lambda= 41.04518
## phi= 0.8182356 sv2= 0.01952194 df= 7.97063 lambda= 41.91364
## phi= 0.8185595 sv2= 0.0193653 df= 7.933646 lambda= 42.2694
## phi= 0.8186918 sv2= 0.01930227 df= 7.918771 lambda= 42.41428
## phi= 0.8187455 sv2= 0.01927679 df= 7.912758 lambda= 42.47313
## phi= 0.8187674 sv2= 0.01926647 df= 7.910323 lambda= 42.49702
## phi= 0.8187762 sv2= 0.01926228 df= 7.909335 lambda= 42.50671
## phi= 0.8187798 sv2= 0.01926058 df= 7.908935 lambda= 42.51064
Mortality is lower during the latter period.
iii.
## Compare with Kaplan-Meier plot
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot(sfit7a1,
## No automatic labelling of the curve (we do that ourselves)
mark.time=FALSE,
## Time is measured in months, but we want to see it in years
xscale=12,
ylim=c(0.6,1),
## Make the plot prettier
xlab="Years since diagnosis",
ylab="S(t)",
col=c("blue","red"),
lty=c("solid","dashed"))
legend("bottomleft",legend=levels(melanoma.l$year8594),
col=c("blue","red"),lty=c("solid","dashed"), bty="n")
cols = c("Diagnosed 75-84"="blue", "Diagnosed 85-94"="red")
ltys = c("Diagnosed 75-84"="solid", "Diagnosed 85-94"="dashed")
for(level in levels(melanoma.l$year8594))
plot(bshazard(Surv(surv_mm/12, status == "Dead: cancer") ~ 1,
data=subset(melanoma.l, year8594==level),
verbose=FALSE),
xlab="Years since diagnosis", main=level,
xlim=c(0,22), ylim=c(0,0.08), col=cols[level],
lty=ltys[level])
The three graphs both show that prognosis is better during the latter period. Patients diagnosed during the latter period have lower mortality and higher survival.
(b)
year8594 | tstop | event | rate | lower | upper | |
---|---|---|---|---|---|---|
year8594=Diagnosed 75-84 | Diagnosed 75-84 | 22662.79 | 572 | 0.0252396 | 0.0232133 | 0.0273954 |
year8594=Diagnosed 85-94 | Diagnosed 85-94 | 15963.79 | 441 | 0.0276250 | 0.0251066 | 0.0303277 |
The estimated mortality rate is lower for patients diagnosed during the early period. This is not consistent with what we saw in previous analyses. The inconsistency is due to the fact that we have not controlled for time since diagnosis. look at the graph of the estimated hazards (on the previous page) and try and estimate the overall average value for each group. We see that the average hazard for patients diagnosed in the early period is drawn down by the low mortality experienced by patients 10 years subsequent to diagnosis.
(c)
i.
## Calculate the incidence rate by time of diagnosis
## but with new variables
melanoma.l2 <-
transform(melanoma.l,
## Update the death indicator (only count deaths within 120 months)
## death_cancer = death_cancer * as.numeric(surv_mm<=120),
death_cancer = ifelse(surv_mm<=120 & status == "Dead: cancer",1,0),
## Create a new time variable
surv_mm = pmin(surv_mm,120))
## Calculate the rates on the truncated data
rates_by_diag_yr2 <- survRate(Surv(surv_mm, death_cancer) ~ year8594, data=melanoma.l2)
rates_by_diag_yr2 |> kable("html")
year8594 | tstop | event | rate | lower | upper | |
---|---|---|---|---|---|---|
year8594=Diagnosed 75-84 | Diagnosed 75-84 | 198012.5 | 519 | 0.0026210 | 0.0024004 | 0.0028566 |
year8594=Diagnosed 85-94 | Diagnosed 85-94 | 190507.5 | 441 | 0.0023149 | 0.0021038 | 0.0025413 |
Now that we have restricted follow-up to a maximum of 10 years we see that the average mortality rate for patients diagnosed in the early period is higher than for the latter period. This is consistent with the graphs we examined in part (a).
ii.
## [1] 0.8831852
##
## Comparison of Poisson rates
##
## data: event time base: tstop
## count1 = 441, expected count1 = 470.73, p-value = 0.05682
## alternative hypothesis: true rate ratio is not equal to 1
## 95 percent confidence interval:
## 0.7761294 1.0046616
## sample estimates:
## rate ratio
## 0.8831852
iii.
## Use glm to estimate the rate ratios
## we scale the offset term to 1000 person-years
poisson7c <- glm(death_cancer ~ year8594 + offset(log(surv_mm/12/1000)),
family=poisson, data=melanoma.l2)
summary(poisson7c)
##
## Call:
## glm(formula = death_cancer ~ year8594 + offset(log(surv_mm/12/1000)),
## family = poisson, data = melanoma.l2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.44848 0.04389 78.566 <2e-16 ***
## year8594Diagnosed 85-94 -0.12422 0.06476 -1.918 0.0551 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 4814.3 on 5317 degrees of freedom
## Residual deviance: 4810.6 on 5316 degrees of freedom
## AIC: 6734.6
##
## Number of Fisher Scoring iterations: 6
## also for collapsed data
summary(glm(event ~ year8594 + offset(log(tstop/12/1000)), family=poisson,
data=rates_by_diag_yr2))
##
## Call:
## glm(formula = event ~ year8594 + offset(log(tstop/12/1000)),
## family = poisson, data = rates_by_diag_yr2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.44848 0.04390 78.562 <2e-16 ***
## year8594Diagnosed 85-94 -0.12422 0.06476 -1.918 0.0551 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3.6890e+00 on 1 degrees of freedom
## Residual deviance: 1.5543e-14 on 0 degrees of freedom
## AIC: 20.017
##
## Number of Fisher Scoring iterations: 2
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 31.453 0.043893 78.566 0 28.824 34.237
## 2 year8594Diagnosed 85… 0.88319 0.064762 -1.9181 0.055096 0.77770 1.0025
## exp(beta) 2.5 % 97.5 %
## (Intercept) 31.4525600 28.859880 34.278158
## year8594Diagnosed 85-94 0.8831851 0.777905 1.002714
## Note that the scaling of the offset term only has an impact on the intercept
summary(glm(death_cancer ~ year8594 + offset(log(surv_mm)),
family=poisson, data=melanoma.l2))
##
## Call:
## glm(formula = death_cancer ~ year8594 + offset(log(surv_mm)),
## family = poisson, data = melanoma.l2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.94418 0.04389 -135.426 <2e-16 ***
## year8594Diagnosed 85-94 -0.12422 0.06476 -1.918 0.0551 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 4814.3 on 5317 degrees of freedom
## Residual deviance: 4810.6 on 5316 degrees of freedom
## AIC: 6734.6
##
## Number of Fisher Scoring iterations: 6
We see that Poisson regression is estimating the mortality rate ratio which, in this simple example, is the ratio of the two mortality rates.
The regression equation is:
\[\begin{align*} E(\text{death_cancer}) &= \frac{\text{surv_mm}}{12 \times 1000}\exp\left(\beta_0 + \beta_1 I(\text{year8594}="\text{Diagnosed 85-94}")\right) \\ &= \exp\left(\beta_0 + \beta_1 I(\text{year8594}="\text{Diagnosed 85-94}") + \log(\text{surv_mm}/1000/12) \right) \end{align*}\]
where \(E(\text{death_cancer})\) is the expected number of cancer deaths, \(\beta_0\) is the intercept term for the log rate, \(\beta_1\) is the log rate ratio for the later calendar period, and \(\text{surv_mm}/1000/12\) is the period-time.
(d)
In order to adjust for time since diagnosis (i.e., adjust for the fact that we expect mortality to depend on time since diagnosis) we need to split the data by this timescale. We will restrict our analysis to mortality up to 10 years following diagnosis.
## Add a new variable for year
melanoma.l2 <- transform(melanoma.l2, surv_yy1 = surv_mm/12)
## Split follow up by year
melanoma.spl <- survSplit(melanoma.l2, cut=0:9, end="surv_yy1", start="start",
event="death_cancer")
## Calculate persontime and
## recode start time as a factor
melanoma.spl <- transform(melanoma.spl,
pt = surv_yy1 - start,
fu = as.factor(start) )
(e)
## Calculate the incidence rate by observation year
yearly_rates <- survRate(Surv(pt/1000,death_cancer)~fu, data=melanoma.spl) |>
transform(start=as.numeric(levels(fu))[fu]) |>
transform(mid=start+0.5)
library(tinyplot) # lightweight base graphics extension
with(yearly_rates, {
plt(rate~start, ymin=lower, ymax=upper, type="ribbon",
main="Cancer deaths by years since diagnosis",
ylab="Incidence rate per 1000 person-years",
xlab="Years since diagnosis")
})
It seems reasonable (at least to me) that melanoma-specific mortality is lower during the first year. These patients were classified as having localised skin melanoma at the time of diagnosis. That is, there was no evidence of metastases at the time of diagnosis although many of the patients who died would have had undetectable metastases or micrometastases at the time of diagnosis. It appears that it takes at least one year for these initially undetectable metastases to progress and cause the death of the patient.
(f)
# Plot smoothed hazards
library(bshazard)
library(tinyplot)
par(mfrow=1:2)
survRate(Surv(pt,death_cancer)~fu, data=melanoma.spl) |>
transform(start=as.numeric(levels(fu))[fu]) |>
transform(mid=start+0.5) |>
with({
plt(rate~mid, ymin=lower, ymax=upper,
type="ribbon",
main="Rates",
ylab="Mortality rate per person-year",
xlab="Years since diagnosis",
xlim=c(0,10))
})
bshazard(Surv(surv_mm/12, status == "Dead: cancer") ~ 1, data = melanoma.l) |>
plot(xlab="Years since diagnosis", xlim=c(0,10),
main="Smoothed hazard")
## Iterations: relative error in phi-hat = 1e-04
## phi= 0.7977696 sv2= 0.1010981 df= 12.03631 lambda= 7.891047
## phi= 0.7933826 sv2= 0.1080883 df= 12.65859 lambda= 7.340133
## phi= 0.7922988 sv2= 0.1102665 df= 12.85334 lambda= 7.185309
## phi= 0.7920014 sv2= 0.1109128 df= 12.9111 lambda= 7.14076
## phi= 0.7919165 sv2= 0.1111017 df= 12.92799 lambda= 7.12785
## phi= 0.791892 sv2= 0.1111568 df= 12.9329 lambda= 7.124101
## phi= 0.7918848 sv2= 0.1111727 df= 12.93433 lambda= 7.123012
## phi= 0.7918828 sv2= 0.1111774 df= 12.93475 lambda= 7.122695
The pattern is similar. The plot of the mortality rates could be considered an approximation to the ‘true’ functional form depicted in the hazard plot. By estimating the rates for each year of follow-up we are essentially approximating the hazard using a step function. It would probably be more informative to use narrower intervals (e.g., 6-month intervals) for the first 6 months of follow-up.
Note that I have used the tinyplot
package for the
ribbon plot, which is more lightweight than the ggplot2
package.
(g)
## Run Poisson regression
summary(poisson7g <- glm(death_cancer ~ fu + offset(log(pt)),
family = poisson,
data = melanoma.spl))
##
## Call:
## glm(formula = death_cancer ~ fu + offset(log(pt)), family = poisson,
## data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.3046 0.1187 -36.276 < 2e-16 ***
## fu1 1.2456 0.1359 9.166 < 2e-16 ***
## fu2 1.2617 0.1380 9.145 < 2e-16 ***
## fu3 1.0127 0.1460 6.934 4.08e-12 ***
## fu4 0.8186 0.1552 5.275 1.33e-07 ***
## fu5 0.7265 0.1630 4.456 8.36e-06 ***
## fu6 0.4961 0.1787 2.776 0.00551 **
## fu7 0.1682 0.2065 0.815 0.41531
## fu8 0.2886 0.2085 1.384 0.16641
## fu9 -0.2899 0.2765 -1.048 0.29452
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8446.4 on 34299 degrees of freedom
## AIC: 10386
##
## Number of Fisher Scoring iterations: 6
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.0135058 0.01070318 0.01704229
## fu1 3.4750768 2.66250895 4.53563117
## fu2 3.5312671 2.69465247 4.62762741
## fu3 2.7529574 2.06771439 3.66529071
## fu4 2.2673515 1.67274409 3.07332288
## fu5 2.0677380 1.50217318 2.84623672
## fu6 1.6422607 1.15697948 2.33108731
## fu7 1.1831887 0.78936945 1.77348567
## fu8 1.3345367 0.88679255 2.00834805
## fu9 0.7483554 0.43523195 1.28675268
The pattern of the estimated mortality rate ratios mirrors the
pattern we saw in the plot of the rates. Note that the first year of
follow-up is the reference so the estimated rate ratio labelled
1
for fu
is the rate ratio for the second year
compared to the first year.
The regression equation is:
\[\begin{align*} E(\text{death_cancer}) &= \text{pt}\exp\left(\beta_0 + \beta_1 I(\text{fu}=1) + \beta_2 I(\text{fu}=2) + \beta_3 I(\text{fu}=3) + \beta_4 I(\text{fu}=4) + \beta_5 I(\text{fu}=5) + \beta_6 I(\text{fu}=6) + \beta_7 I(\text{fu}=7) + \beta_8 I(\text{fu}=8) + \beta_9 I(\text{fu}=9)\right) \\ &= \exp\left(\beta_0 + \beta_1 I(\text{fu}=1) + \beta_2 I(\text{fu}=2) + \beta_3 I(\text{fu}=3) + \beta_4 I(\text{fu}=4) + \beta_5 I(\text{fu}=5) + \beta_6 I(\text{fu}=6) + \beta_7 I(\text{fu}=7) + \beta_8 I(\text{fu}=8) + \beta_9 I(\text{fu}=9) + \log(\text{pt})\right) \end{align*}\]
(h)
summary(poisson7h <- glm( death_cancer ~ fu + year8594 + offset( log(pt) ),
family = poisson,
data = melanoma.spl ))
##
## Call:
## glm(formula = death_cancer ~ fu + year8594 + offset(log(pt)),
## family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.16612 0.12380 -33.651 < 2e-16 ***
## fu1 1.24352 0.13589 9.151 < 2e-16 ***
## fu2 1.25370 0.13797 9.087 < 2e-16 ***
## fu3 0.99738 0.14610 6.827 8.68e-12 ***
## fu4 0.79438 0.15532 5.115 3.14e-07 ***
## fu5 0.69247 0.16329 4.241 2.23e-05 ***
## fu6 0.45104 0.17911 2.518 0.011796 *
## fu7 0.10844 0.20710 0.524 0.600559
## fu8 0.21049 0.20954 1.004 0.315139
## fu9 -0.39239 0.27780 -1.413 0.157802
## year8594Diagnosed 85-94 -0.24444 0.06579 -3.715 0.000203 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8432.6 on 34298 degrees of freedom
## AIC: 10375
##
## Number of Fisher Scoring iterations: 6
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01551228 0.01217011 0.01977229
## fu1 3.46780081 2.65693069 4.52614081
## fu2 3.50326901 2.67320128 4.59108479
## fu3 2.71116147 2.03608968 3.61005538
## fu4 2.21306295 1.63225462 3.00054142
## fu5 1.99864158 1.45125272 2.75249659
## fu6 1.56993640 1.10515372 2.23018778
## fu7 1.11453748 0.74268832 1.67256405
## fu8 1.23427730 0.81855780 1.86112752
## fu9 0.67543732 0.39185040 1.16425955
## year8594Diagnosed 85-94 0.78314061 0.68839639 0.89092450
# Add interaction term
summary(poisson7h2 <- glm(death_cancer ~ fu*year8594 + offset(log(pt)),
family=poisson, data=melanoma.spl))
##
## Call:
## glm(formula = death_cancer ~ fu * year8594 + offset(log(pt)),
## family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.16333 0.17408 -23.917 < 2e-16 ***
## fu1 1.28028 0.19807 6.464 1.02e-10 ***
## fu2 1.39664 0.19729 7.079 1.45e-12 ***
## fu3 0.81978 0.21609 3.794 0.000148 ***
## fu4 0.65306 0.22519 2.900 0.003732 **
## fu5 0.66513 0.22711 2.929 0.003404 **
## fu6 0.41495 0.24264 1.710 0.087240 .
## fu7 0.03655 0.27163 0.135 0.892966
## fu8 0.30339 0.25453 1.192 0.233279
## fu9 -0.38675 0.31895 -1.213 0.225302
## year8594Diagnosed 85-94 -0.24965 0.23795 -1.049 0.294089
## fu1:year8594Diagnosed 85-94 -0.07110 0.27234 -0.261 0.794025
## fu2:year8594Diagnosed 85-94 -0.30852 0.27725 -1.113 0.265806
## fu3:year8594Diagnosed 85-94 0.34410 0.29326 1.173 0.240642
## fu4:year8594Diagnosed 85-94 0.29777 0.31086 0.958 0.338115
## fu5:year8594Diagnosed 85-94 0.06578 0.32894 0.200 0.841494
## fu6:year8594Diagnosed 85-94 0.09658 0.36443 0.265 0.790988
## fu7:year8594Diagnosed 85-94 0.22179 0.42829 0.518 0.604555
## fu8:year8594Diagnosed 85-94 -0.51530 0.53954 -0.955 0.339542
## fu9:year8594Diagnosed 85-94 -0.06040 0.79245 -0.076 0.939240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8419.4 on 34289 degrees of freedom
## AIC: 10379
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01555564 0.01105892 0.0218808
## fu1 3.59765631 2.44018328 5.3041634
## fu2 4.04160662 2.74548881 5.9496087
## fu3 2.26998933 1.48622497 3.4670737
## fu4 1.92140436 1.23576375 2.9874599
## fu5 1.94474961 1.24608035 3.0351582
## fu6 1.51429324 0.94118270 2.4363857
## fu7 1.03722422 0.60906442 1.7663716
## fu8 1.35444106 0.82243832 2.2305753
## fu9 0.67926283 0.36353299 1.2692053
## year8594Diagnosed 85-94 0.77907144 0.48869327 1.2419903
## fu1:year8594Diagnosed 85-94 0.93136483 0.54614148 1.5883072
## fu2:year8594Diagnosed 85-94 0.73453606 0.42659693 1.2647612
## fu3:year8594Diagnosed 85-94 1.41072301 0.79400185 2.5064670
## fu4:year8594Diagnosed 85-94 1.34685580 0.73234442 2.4770047
## fu5:year8594Diagnosed 85-94 1.06799324 0.56050340 2.0349735
## fu6:year8594Diagnosed 85-94 1.10140218 0.53919080 2.2498284
## fu7:year8594Diagnosed 85-94 1.24831405 0.53921836 2.8899016
## fu8:year8594Diagnosed 85-94 0.59732398 0.20746996 1.7197475
## fu9:year8594Diagnosed 85-94 0.94138428 0.19917538 4.4493671
The estimated mortality rate ratio is \(0.7791\) compared to \(0.8832\) (part c) and a value greater than 1 in part (b). The estimate we obtained in part (b) was subject to confounding by time-since-diagnosis. In part (c) we restricted to the first 10 years of follow-up subsequent to diagnosis. This did not, however, completely remove the confounding effect of time since diagnosis. There was still some confounding within the first 10 years of follow-up (if this is not clear to you then look in the data to see if there are associations between the confounder and the exposure and the confounder and the outcome) so the estimate was subject to residual confounding. Now, when we adjust for time since diagnosis we see that the estimate changes further.
(i)
Now control for age, sex, and calendar period. Write out the regression equation.
The regression equation is: \[\begin{align*} E(\text{death_cancer}) &= \text{pt}\exp\left(\beta_0 + \beta_1 I(\text{fu}=1) + \beta_2 I(\text{fu}=2) + \beta_3 I(\text{fu}=3) + \beta_4 I(\text{fu}=4) + \beta_5 I(\text{fu}=5) + \beta_6 I(\text{fu}=6) + \beta_7 I(\text{fu}=7) + \beta_8 I(\text{fu}=8) + \beta_9 I(\text{fu}=9) + \right. \\ &\qquad \left.\beta_{10} x + \beta_{11} I(\text{fu}=1) x + \beta_{12} I(\text{fu}=2) x + \beta_{13} I(\text{fu}=3) x + \beta_{14} I(\text{fu}=4) x + \beta_{15} I(\text{fu}=5) x + \beta_{16} I(\text{fu}=6) x + \beta_{17} I(\text{fu}=7) x + \beta_{18} I(\text{fu}=8) x + \beta_{19} I(\text{fu}=9) x \right) \end{align*}\] where \(x\) is the indicator variable when year8594=“Diagnosed 85-94”.
i.
For patients of the same sex diagnosed in the same calendar period, those aged 60–74 at diagnosis have an estimated 86% higher risk of death due to skin melanoma than those aged 0–44 at diagnosis. The difference is statistically significant.
ii.
The parameter estimate for period changes from 0.78 to 0.72 when age and sex are added to the model. Whether this is ‘strong confounding’, or even ‘confounding’ is a matter of judgement. I would consider this confounding but not strong confounding but there is no correct answer.
iii.
summary(poisson7i <- glm(death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt)),
family=poisson, data=melanoma.spl))
##
## Call:
## glm(formula = death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt)),
## family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.36681 0.14322 -30.490 < 2e-16 ***
## fu1 1.26827 0.13592 9.331 < 2e-16 ***
## fu2 1.30657 0.13806 9.464 < 2e-16 ***
## fu3 1.07575 0.14627 7.354 1.92e-13 ***
## fu4 0.89517 0.15559 5.753 8.75e-09 ***
## fu5 0.81370 0.16368 4.971 6.65e-07 ***
## fu6 0.58637 0.17957 3.265 0.00109 **
## fu7 0.25361 0.20758 1.222 0.22181
## fu8 0.36427 0.21006 1.734 0.08290 .
## fu9 -0.22796 0.27844 -0.819 0.41296
## year8594Diagnosed 85-94 -0.32516 0.06618 -4.913 8.97e-07 ***
## sexFemale -0.53180 0.06545 -8.125 4.48e-16 ***
## agegrp45-59 0.28352 0.09417 3.011 0.00261 **
## agegrp60-74 0.62185 0.09088 6.843 7.76e-12 ***
## agegrp75+ 1.22386 0.10444 11.718 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8233.4 on 34294 degrees of freedom
## AIC: 10183
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01269168 0.009585413 0.01680457
## fu1 3.55468470 2.723340923 4.63980959
## fu2 3.69349752 2.817870250 4.84121792
## fu3 2.93219656 2.201336558 3.90570748
## fu4 2.44775331 1.804376456 3.32053559
## fu5 2.25623262 1.637030417 3.10964634
## fu6 1.79745329 1.264170006 2.55569926
## fu7 1.28866663 0.857919555 1.93568462
## fu8 1.43945962 0.953660962 2.17272602
## fu9 0.79615726 0.461304916 1.37407245
## year8594Diagnosed 85-94 0.72241051 0.634523266 0.82247095
## sexFemale 0.58754651 0.516807578 0.66796796
## agegrp45-59 1.32779475 1.104004888 1.59694845
## agegrp60-74 1.86237635 1.558526802 2.22546423
## agegrp75+ 3.40028687 2.770846371 4.17271449
## Test if the effect of age is significant using a likelihood ratio test
drop1(poisson7i, ~agegrp, test="Chisq")
## Single term deletions
##
## Model:
## death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt))
## Df Deviance AIC LRT Pr(>Chi)
## <none> 8233.4 10183
## agegrp 3 8377.9 10322 144.59 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## For this we can also use the car package and a Wald test
linearHypothesis(poisson7i,c("agegrp45-59 = 0","agegrp60-74 = 0","agegrp75+ = 0"))
## Linear hypothesis test
##
## Hypothesis:
## agegrp45-59 = 0
## agegrp60-74 = 0
## agegrp75+ = 0
##
## Model 1: restricted model
## Model 2: death_cancer ~ fu + year8594 + sex + agegrp + offset(log(pt))
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 34297
## 2 34294 3 155.82 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ADVANCED:
## Alternative approach for the likelihood ratio test
# poisson7i_2 <- update(poisson7i,. ~ . - agegrp)
# anova(poisson7i_2,poisson7i,test="Chisq")
Age (modelled as a categorical variable with 4 levels) is highly significant in the model.
(j)
summary(poisson7j <- glm(death_cancer ~ fu + agegrp + year8594*sex + offset(log(pt)),
family=poisson, data=melanoma.spl))
##
## Call:
## glm(formula = death_cancer ~ fu + agegrp + year8594 * sex + offset(log(pt)),
## family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.37900 0.14596 -30.001 < 2e-16 ***
## fu1 1.26830 0.13592 9.331 < 2e-16 ***
## fu2 1.30659 0.13806 9.464 < 2e-16 ***
## fu3 1.07569 0.14627 7.354 1.92e-13 ***
## fu4 0.89511 0.15559 5.753 8.77e-09 ***
## fu5 0.81360 0.16369 4.971 6.68e-07 ***
## fu6 0.58630 0.17958 3.265 0.001095 **
## fu7 0.25340 0.20759 1.221 0.222197
## fu8 0.36405 0.21007 1.733 0.083090 .
## fu9 -0.22829 0.27845 -0.820 0.412297
## agegrp45-59 0.28270 0.09419 3.001 0.002688 **
## agegrp60-74 0.62118 0.09089 6.835 8.23e-12 ***
## agegrp75+ 1.22364 0.10444 11.717 < 2e-16 ***
## year8594Diagnosed 85-94 -0.29917 0.08840 -3.384 0.000714 ***
## sexFemale -0.50562 0.08813 -5.737 9.64e-09 ***
## year8594Diagnosed 85-94:sexFemale -0.05792 0.13061 -0.443 0.657440
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8233.2 on 34293 degrees of freedom
## AIC: 10185
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01253791 0.009418543 0.01669039
## fu1 3.55479530 2.723425214 4.63995469
## fu2 3.69354707 2.817905687 4.84128693
## fu3 2.93201254 2.201195141 3.90546816
## fu4 2.44760423 1.804262241 3.32034133
## fu5 2.25601958 1.636868208 3.10936723
## fu6 1.79732528 1.264071466 2.55553445
## fu7 1.28840069 0.857735550 1.93530086
## fu8 1.43915154 0.953447823 2.17228159
## fu9 0.79589580 0.461149435 1.37363308
## agegrp45-59 1.32670920 1.103059120 1.59570531
## agegrp60-74 1.86113111 1.557443264 2.22403543
## agegrp75+ 3.39953913 2.770276830 4.17173697
## year8594Diagnosed 85-94 0.74143513 0.623488755 0.88169360
## sexFemale 0.60313385 0.507452579 0.71685602
## year8594Diagnosed 85-94:sexFemale 0.94372451 0.730577239 1.21905789
The interaction term is not statistically significant indicating that there is no evidence that the effect of sex is modified by period.
(k)
Based on the interaction model you fitted in exercise 7j, estimate the hazard ratio for the effect of sex (with 95% confidence interval) for each calendar period.
ADVANCED: Do this with each of the following methods and confirm that the results are the same:
i.
## sexFemale
## 0.6031338
## sexFemale
## 0.5691922
The effect of sex for patients diagnosed 1975–84 is \(0.6031338\) and the effect of sex for patients diagnosed 1985–94 is \(0.6031338 \times 0.9437245=0.56919214\).
ii.
We can use lincom
to get the estimated effect for
patients diagnosed 1985–94.
## You will need the "car" package to use lincom. If it is not already installed:
## install.packages("car")
lincom(poisson7j,c("sexFemale + year8594Diagnosed 85-94:sexFemale"),eform=TRUE)
## Estimate 2.5 % 97.5 %
## sexFemale + year8594Diagnosed 85-94:sexFemale 0.5691922 0.4705541 0.6885069
## Chisq Pr(>Chisq)
## sexFemale + year8594Diagnosed 85-94:sexFemale 33.68456 6.481293e-09
The advantage of lincom
is that we also get a confidence
interval (not easy to calculate by hand since the SE is a function of
variances and covariances).
iii.
## Create dummies and Poisson regression
melanoma.spl <- melanoma.spl |>
## Add confidence intervals for the rates
transform(femaleEarly = sex=="Female" & year8594=="Diagnosed 75-84",
femaleLate = sex=="Female" & year8594=="Diagnosed 85-94")
summary(poisson7k <- glm( death_cancer ~ fu + agegrp + year8594 + femaleEarly +
femaleLate + offset( log(pt) ), family=poisson,
data=melanoma.spl ))
##
## Call:
## glm(formula = death_cancer ~ fu + agegrp + year8594 + femaleEarly +
## femaleLate + offset(log(pt)), family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.37900 0.14596 -30.001 < 2e-16 ***
## fu1 1.26830 0.13592 9.331 < 2e-16 ***
## fu2 1.30659 0.13806 9.464 < 2e-16 ***
## fu3 1.07569 0.14627 7.354 1.92e-13 ***
## fu4 0.89511 0.15559 5.753 8.77e-09 ***
## fu5 0.81360 0.16369 4.971 6.68e-07 ***
## fu6 0.58630 0.17958 3.265 0.001095 **
## fu7 0.25340 0.20759 1.221 0.222197
## fu8 0.36405 0.21007 1.733 0.083090 .
## fu9 -0.22829 0.27845 -0.820 0.412297
## agegrp45-59 0.28270 0.09419 3.001 0.002688 **
## agegrp60-74 0.62118 0.09089 6.835 8.23e-12 ***
## agegrp75+ 1.22364 0.10444 11.717 < 2e-16 ***
## year8594Diagnosed 85-94 -0.29917 0.08840 -3.384 0.000714 ***
## femaleEarlyTRUE -0.50562 0.08813 -5.737 9.64e-09 ***
## femaleLateTRUE -0.56354 0.09710 -5.804 6.48e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8233.2 on 34293 degrees of freedom
## AIC: 10185
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01253791 0.009418543 0.01669039
## fu1 3.55479530 2.723425214 4.63995469
## fu2 3.69354707 2.817905687 4.84128693
## fu3 2.93201254 2.201195141 3.90546816
## fu4 2.44760423 1.804262241 3.32034133
## fu5 2.25601958 1.636868208 3.10936723
## fu6 1.79732528 1.264071466 2.55553445
## fu7 1.28840069 0.857735550 1.93530086
## fu8 1.43915154 0.953447823 2.17228159
## fu9 0.79589580 0.461149435 1.37363308
## agegrp45-59 1.32670920 1.103059120 1.59570531
## agegrp60-74 1.86113111 1.557443264 2.22403543
## agegrp75+ 3.39953913 2.770276830 4.17173697
## year8594Diagnosed 85-94 0.74143513 0.623488755 0.88169360
## femaleEarlyTRUE 0.60313385 0.507452579 0.71685602
## femaleLateTRUE 0.56919219 0.470554120 0.68850689
iv.
## Add interaction term
summary(poisson7k2 <- glm( death_cancer ~ fu + agegrp + year8594 + year8594:sex +
offset( log(pt) ), family=poisson,
data=melanoma.spl ))
##
## Call:
## glm(formula = death_cancer ~ fu + agegrp + year8594 + year8594:sex +
## offset(log(pt)), family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.37900 0.14596 -30.001 < 2e-16 ***
## fu1 1.26830 0.13592 9.331 < 2e-16 ***
## fu2 1.30659 0.13806 9.464 < 2e-16 ***
## fu3 1.07569 0.14627 7.354 1.92e-13 ***
## fu4 0.89511 0.15559 5.753 8.77e-09 ***
## fu5 0.81360 0.16369 4.971 6.68e-07 ***
## fu6 0.58630 0.17958 3.265 0.001095 **
## fu7 0.25340 0.20759 1.221 0.222197
## fu8 0.36405 0.21007 1.733 0.083090 .
## fu9 -0.22829 0.27845 -0.820 0.412297
## agegrp45-59 0.28270 0.09419 3.001 0.002688 **
## agegrp60-74 0.62118 0.09089 6.835 8.23e-12 ***
## agegrp75+ 1.22364 0.10444 11.717 < 2e-16 ***
## year8594Diagnosed 85-94 -0.29917 0.08840 -3.384 0.000714 ***
## year8594Diagnosed 75-84:sexFemale -0.50562 0.08813 -5.737 9.64e-09 ***
## year8594Diagnosed 85-94:sexFemale -0.56354 0.09710 -5.804 6.48e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8233.2 on 34293 degrees of freedom
## AIC: 10185
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01253791 0.009418543 0.01669039
## fu1 3.55479530 2.723425214 4.63995469
## fu2 3.69354707 2.817905687 4.84128693
## fu3 2.93201254 2.201195141 3.90546816
## fu4 2.44760423 1.804262241 3.32034133
## fu5 2.25601958 1.636868208 3.10936723
## fu6 1.79732528 1.264071466 2.55553445
## fu7 1.28840069 0.857735550 1.93530086
## fu8 1.43915154 0.953447823 2.17228159
## fu9 0.79589580 0.461149435 1.37363308
## agegrp45-59 1.32670920 1.103059120 1.59570531
## agegrp60-74 1.86113111 1.557443264 2.22403543
## agegrp75+ 3.39953913 2.770276830 4.17173697
## year8594Diagnosed 85-94 0.74143513 0.623488755 0.88169360
## year8594Diagnosed 75-84:sexFemale 0.60313385 0.507452579 0.71685602
## year8594Diagnosed 85-94:sexFemale 0.56919219 0.470554120 0.68850689
(l)
If we fit stratified models we get slightly different estimates (\(0.6165815\) and \(0.5549737\)) since the models stratified by calendar period imply that all estimates are modified by calendar period. That is, we are actually estimating the following model:
summary( poisson7l.early <- glm( death_cancer ~ fu + agegrp + sex + offset( log(pt) ),
family = poisson, data = melanoma.spl,
subset = year8594 == "Diagnosed 75-84" ) )
##
## Call:
## glm(formula = death_cancer ~ fu + agegrp + sex + offset(log(pt)),
## family = poisson, data = melanoma.spl, subset = year8594 ==
## "Diagnosed 75-84")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.35024 0.19749 -22.028 < 2e-16 ***
## fu1 1.29711 0.19807 6.549 5.80e-11 ***
## fu2 1.43389 0.19734 7.266 3.70e-13 ***
## fu3 0.87511 0.21622 4.047 5.18e-05 ***
## fu4 0.72162 0.22538 3.202 0.00137 **
## fu5 0.74575 0.22738 3.280 0.00104 **
## fu6 0.50579 0.24297 2.082 0.03737 *
## fu7 0.13806 0.27198 0.508 0.61172
## fu8 0.41333 0.25503 1.621 0.10508
## fu9 -0.26674 0.31931 -0.835 0.40352
## agegrp45-59 0.36623 0.12112 3.024 0.00250 **
## agegrp60-74 0.59417 0.11933 4.979 6.39e-07 ***
## agegrp75+ 1.02300 0.15322 6.677 2.45e-11 ***
## sexFemale -0.48356 0.08839 -5.471 4.48e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 4649.9 on 16933 degrees of freedom
## Residual deviance: 4430.6 on 16920 degrees of freedom
## AIC: 5496.6
##
## Number of Fisher Scoring iterations: 6
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01290368 0.008762127 0.01900279
## fu1 3.65871653 2.481590140 5.39420529
## fu2 4.19499842 2.849396206 6.17604941
## fu3 2.39915019 1.570414849 3.66522365
## fu4 2.05775938 1.322964109 3.20067161
## fu5 2.10802429 1.349983775 3.29171839
## fu6 1.65830219 1.030014324 2.66983293
## fu7 1.14804722 0.673675578 1.95644975
## fu8 1.51184459 0.917120868 2.49222775
## fu9 0.76587332 0.409596432 1.43204847
## agegrp45-59 1.44229093 1.137518383 1.82872044
## agegrp60-74 1.81153335 1.433732925 2.28888730
## agegrp75+ 2.78152101 2.059958553 3.75583242
## sexFemale 0.61658148 0.518507340 0.73320605
summary( poisson7l.late <- glm( death_cancer ~ fu + agegrp + sex + offset( log(pt) ),
family = poisson, data = melanoma.spl,
subset = year8594 == "Diagnosed 85-94" ) )
##
## Call:
## glm(formula = death_cancer ~ fu + agegrp + sex + offset(log(pt)),
## family = poisson, data = melanoma.spl, subset = year8594 ==
## "Diagnosed 85-94")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.716255 0.198840 -23.719 < 2e-16 ***
## fu1 1.242842 0.186946 6.648 2.97e-11 ***
## fu2 1.158910 0.194930 5.945 2.76e-09 ***
## fu3 1.269035 0.198560 6.391 1.65e-10 ***
## fu4 1.091821 0.214814 5.083 3.72e-07 ***
## fu5 0.907839 0.238726 3.803 0.000143 ***
## fu6 0.712226 0.272810 2.611 0.009036 **
## fu7 0.467548 0.331976 1.408 0.159019
## fu8 0.003177 0.476361 0.007 0.994678
## fu9 -0.219501 0.725878 -0.302 0.762352
## agegrp45-59 0.170696 0.149540 1.141 0.253672
## agegrp60-74 0.657696 0.140647 4.676 2.92e-06 ***
## agegrp75+ 1.384288 0.148765 9.305 < 2e-16 ***
## sexFemale -0.588835 0.097576 -6.035 1.59e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3997.8 on 17374 degrees of freedom
## Residual deviance: 3780.7 on 17361 degrees of freedom
## AIC: 4690.7
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.008948632 0.006060435 0.01321324
## fu1 3.465448196 2.402316723 4.99906240
## fu2 3.186458106 2.174619253 4.66910023
## fu3 3.557416952 2.410572145 5.24988037
## fu4 2.979696169 1.955790301 4.53964275
## fu5 2.478958882 1.552620335 3.95797801
## fu6 2.038523435 1.194262121 3.47961953
## fu7 1.596076214 0.832673832 3.05937233
## fu8 1.003182304 0.394367397 2.55187103
## fu9 0.802919497 0.193554438 3.33074109
## agegrp45-59 1.186130380 0.884797127 1.59008798
## agegrp60-74 1.930340028 1.465259625 2.54303917
## agegrp75+ 3.991981639 2.982359282 5.34339290
## sexFemale 0.554973666 0.458369470 0.67193779
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01269168 0.009585413 0.01680457
## fu1 3.55468470 2.723340923 4.63980959
## fu2 3.69349752 2.817870250 4.84121792
## fu3 2.93219656 2.201336558 3.90570748
## fu4 2.44775331 1.804376456 3.32053559
## fu5 2.25623262 1.637030417 3.10964634
## fu6 1.79745329 1.264170006 2.55569926
## fu7 1.28866663 0.857919555 1.93568462
## fu8 1.43945962 0.953660962 2.17272602
## fu9 0.79615726 0.461304916 1.37407245
## year8594Diagnosed 85-94 0.72241051 0.634523266 0.82247095
## sexFemale 0.58754651 0.516807578 0.66796796
## agegrp45-59 1.32779475 1.104004888 1.59694845
## agegrp60-74 1.86237635 1.558526802 2.22546423
## agegrp75+ 3.40028687 2.770846371 4.17271449
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01253791 0.009418543 0.01669039
## fu1 3.55479530 2.723425214 4.63995469
## fu2 3.69354707 2.817905687 4.84128693
## fu3 2.93201254 2.201195141 3.90546816
## fu4 2.44760423 1.804262241 3.32034133
## fu5 2.25601958 1.636868208 3.10936723
## fu6 1.79732528 1.264071466 2.55553445
## fu7 1.28840069 0.857735550 1.93530086
## fu8 1.43915154 0.953447823 2.17228159
## fu9 0.79589580 0.461149435 1.37363308
## agegrp45-59 1.32670920 1.103059120 1.59570531
## agegrp60-74 1.86113111 1.557443264 2.22403543
## agegrp75+ 3.39953913 2.770276830 4.17173697
## year8594Diagnosed 85-94 0.74143513 0.623488755 0.88169360
## sexFemale 0.60313385 0.507452579 0.71685602
## year8594Diagnosed 85-94:sexFemale 0.94372451 0.730577239 1.21905789
# Poisson-regression with effects specific for diagnose period
summary(poisson7l2 <- glm( death_cancer ~ fu + fu:year8594 + agegrp + agegrp:year8594
+ sex*year8594 + offset( log(pt) ),
family=poisson, data=melanoma.spl ))
##
## Call:
## glm(formula = death_cancer ~ fu + fu:year8594 + agegrp + agegrp:year8594 +
## sex * year8594 + offset(log(pt)), family = poisson, data = melanoma.spl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.35024 0.19751 -22.026 < 2e-16 ***
## fu1 1.29711 0.19808 6.548 5.82e-11 ***
## fu2 1.43389 0.19736 7.266 3.72e-13 ***
## fu3 0.87511 0.21623 4.047 5.18e-05 ***
## fu4 0.72162 0.22540 3.202 0.00137 **
## fu5 0.74575 0.22740 3.280 0.00104 **
## fu6 0.50579 0.24299 2.082 0.03738 *
## fu7 0.13806 0.27202 0.508 0.61177
## fu8 0.41333 0.25504 1.621 0.10510
## fu9 -0.26674 0.31945 -0.835 0.40372
## agegrp45-59 0.36623 0.12113 3.024 0.00250 **
## agegrp60-74 0.59417 0.11934 4.979 6.40e-07 ***
## agegrp75+ 1.02300 0.15323 6.676 2.45e-11 ***
## sexFemale -0.48356 0.08839 -5.471 4.48e-08 ***
## year8594Diagnosed 85-94 -0.36601 0.28026 -1.306 0.19156
## fu1:year8594Diagnosed 85-94 -0.05427 0.27237 -0.199 0.84207
## fu2:year8594Diagnosed 85-94 -0.27498 0.27739 -0.991 0.32153
## fu3:year8594Diagnosed 85-94 0.39392 0.29357 1.342 0.17965
## fu4:year8594Diagnosed 85-94 0.37020 0.31137 1.189 0.23445
## fu5:year8594Diagnosed 85-94 0.16209 0.32970 0.492 0.62298
## fu6:year8594Diagnosed 85-94 0.20643 0.36533 0.565 0.57204
## fu7:year8594Diagnosed 85-94 0.32949 0.42919 0.768 0.44267
## fu8:year8594Diagnosed 85-94 -0.41015 0.54034 -0.759 0.44781
## fu9:year8594Diagnosed 85-94 0.04724 0.79306 0.060 0.95250
## year8594Diagnosed 85-94:agegrp45-59 -0.19554 0.19244 -1.016 0.30959
## year8594Diagnosed 85-94:agegrp60-74 0.06352 0.18446 0.344 0.73056
## year8594Diagnosed 85-94:agegrp75+ 0.36129 0.21357 1.692 0.09070 .
## year8594Diagnosed 85-94:sexFemale -0.10527 0.13166 -0.800 0.42397
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8651.5 on 34308 degrees of freedom
## Residual deviance: 8211.4 on 34281 degrees of freedom
## AIC: 10187
##
## Number of Fisher Scoring iterations: 7
## exp(beta) 2.5 % 97.5 %
## (Intercept) 0.01290368 0.008761852 0.01900338
## fu1 3.65871657 2.481529705 5.39433679
## fu2 4.19499847 2.849324333 6.17620534
## fu3 2.39915022 1.570376752 3.66531265
## fu4 2.05775940 1.322929450 3.20075553
## fu5 2.10802431 1.349942800 3.29181835
## fu6 1.65830221 1.029983069 2.66991399
## fu7 1.14804717 0.673627936 1.95658794
## fu8 1.51184461 0.917092926 2.49230372
## fu9 0.76587284 0.409489206 1.43242168
## agegrp45-59 1.44229095 1.137497545 1.82875400
## agegrp60-74 1.81153338 1.433707489 2.28892799
## agegrp75+ 2.78152106 2.059922700 3.75589792
## sexFemale 0.61658148 0.518502522 0.73321286
## year8594Diagnosed 85-94 0.69349480 0.400391639 1.20116153
## fu1:year8594Diagnosed 85-94 0.94717591 0.555376681 1.61537607
## fu2:year8594Diagnosed 85-94 0.75958505 0.441020116 1.30826106
## fu3:year8594Diagnosed 85-94 1.48278208 0.834053707 2.63609247
## fu4:year8594Diagnosed 85-94 1.44802943 0.786579357 2.66570589
## fu5:year8594Diagnosed 85-94 1.17596314 0.616248847 2.24404364
## fu6:year8594Diagnosed 85-94 1.22928344 0.600729103 2.51550617
## fu7:year8594Diagnosed 85-94 1.39025317 0.599473921 3.22416674
## fu8:year8594Diagnosed 85-94 0.66354855 0.230109662 1.91342111
## fu9:year8594Diagnosed 85-94 1.04837181 0.221544163 4.96101286
## year8594Diagnosed 85-94:agegrp45-59 0.82239328 0.563992308 1.19918426
## year8594Diagnosed 85-94:agegrp60-74 1.06558347 0.742295370 1.52967158
## year8594Diagnosed 85-94:agegrp75+ 1.43517937 0.944316163 2.18119726
## year8594Diagnosed 85-94:sexFemale 0.90008164 0.695365474 1.16506642
(m)
This is more advanced code. After splitting finely, we fit a Poisson regression with natural splines for the mid-points and then plot the predicted rates.
## Split follow up by month
library(splines)
library(tinyplot)
time.cut <- seq(0,10,by=1/12)
nrow(biostat3::melanoma)
## [1] 7775
melanoma.spl2 <- survSplit(Surv(surv_mm/12,status=="Dead: cancer")~.,
data=biostat3::melanoma,
cut=time.cut,
subset=stage=="Localised") |>
transform(mid=(tstop+tstart)/2, risk_time=tstop-tstart)
nrow(melanoma.spl2)
## [1] 391911
poisson7m <- glm(event ~ ns(mid,df=6) + agegrp + year8594 +
offset(log(risk_time)),
family=poisson,
data=melanoma.spl2)
df <- data.frame(agegrp="0-44", year8594="Diagnosed 75-84",
mid=time.cut[-1], risk_time=1)
pred <- predict(poisson7m, newdata=df, se.fit=TRUE)
qq <- qnorm(0.975)
df <- with(pred, transform(df,
fit=exp(fit), # parallel (not sequential:)
conf.low=exp(fit-qq*se.fit),
conf.high=exp(fit+qq*se.fit)))
## plot the rate at the baseline values
with(df, plt(fit~mid, ymin=conf.low, ymax=conf.high, type="ribbon",
ylab="Rate", xlab="Time since diagnosis (years)",
ylim=c(0,0.05)))
(n)
This is more advanced code. We use the rstpm2::predictnl
function to calculate the variance for some estimator using the delta
method. We show examples using both a rate ratio and a rate
difference:
## using melanoma.spl2 and df from previous chunk
poisson7n <- glm(event ~ ns(mid,df=4) + agegrp + year8594 +
ifelse(year8594=="Diagnosed 85-94",1,0):ns(mid,df=3) +
offset(log(risk_time)),
family=poisson,
data=melanoma.spl2)
library(rstpm2)
library(tinyplot)
## get log(RR) confidence interval using predictnl (delta method)
df <- data.frame(agegrp="0-44", year8594="Diagnosed 75-84",
mid=time.cut[-1], risk_time=1)
predictnl(poisson7n, function(object)
predict(object, newdata=transform(df, year8594="Diagnosed 85-94"), type="link") -
predict(object, newdata=df, type="link")) |>
cbind(df) |>
as.data.frame() |>
transform(fit = exp(fit),
conf.low=exp(fit-1.96*se.fit),
conf.high=exp(fit+1.96*se.fit)) |>
with(plt(fit~mid, ymin=conf.low, ymax=conf.high, type="ribbon",
xlab="Time since diagnosis (years)",
ylab="Rate ratio"))
predictnl(poisson7n,
function(object)
predict(object, newdata=transform(df, year8594="Diagnosed 85-94"),
type="response") -
predict(object, newdata=df, type="response")) |>
cbind(df) |>
transform(conf.low=fit-1.96*se.fit,
conf.high=fit+1.96*se.fit) |>
with(plt(fit~mid, ymin=conf.low, ymax=conf.high, type="ribbon",
xlab="Time since diagnosis (years)",
ylab="Rate difference"))
(o)
This is more advanced code. First, we use the
rstpm2::markov_msm
function for Markov multi-state models
to estimate survival for the Poisson regression model.
Then we use the rstpm2::predictnl
function to calculate
the variance for some estimator using the delta method. The estimator
for survival uses ordinary differential equations – which is outside the
scope of Biostatistics III:).
## Calculate survival from a smooth Poisson regression model
if (requireNamespace("deSolve")) {
twoState <- function(object, ...) {
markov_msm(list(object),matrix(c(NA,1,NA,NA),2,byrow=TRUE), ...) |>
as.data.frame() |>
subset(state==1)
}
df3 <- expand.grid(agegrp=levels(biostat3::melanoma$agegrp),
year8594=levels(biostat3::melanoma$year8594)) |>
transform(risk_time=1)
library(tinyplot)
twoState(poisson7n, t=c(0,time.cut), newdata = df3, tmvar = "mid") |>
with(plt(P~time|year8594, ymin=P.lower,ymax=P.upper,
type="ribbon",facet=agegrp,
xlab="Time since diagnosis (years)",
ylab="Survival"))
} else cat("To run this example, please install the deSolve package\n")