Biostatistics III in R

Exercise 28. Flexible parametric survival models in R

library(biostat3)  
library(rstpm2)  # for the flexible parametric model
library(ggplot2)
library(tinyplot)

## Extract subset and create 0/1 outcome variables
melanoma0 <- biostat3::melanoma |> subset(stage=="Localised") |>
             transform(event = ifelse(status=="Dead: cancer" & surv_mm<120, 1, 0),
                       time = pmin(120, surv_mm)/12,
                       agegrp1 = (agegrp=="0-44")+0,  # used by time-dependent effect
                       agegrp2 = (agegrp=="45-59")+0, # used by time-dependent effect
                       agegrp3 = (agegrp=="60-74")+0, # used by time-dependent effect
                       agegrp4 = (agegrp=="75+")+0)   # used by time-dependent effect
agegrps <- levels(melanoma0$agegrp)

(a)

The stpm2 output can be seen below.

## (a) Flexible parametric model with df=4
fpma <- stpm2(Surv(time,event) ~ year8594, data=melanoma0, df=4)
summary(fpma)

## Maximum likelihood estimation
## 
## Call:
## stpm2(formula = Surv(time, event) ~ year8594, data = melanoma0, 
##     df = 4)
## 
## Coefficients:
##                         Estimate Std. Error  z value     Pr(z)    
## (Intercept)             -8.68011    0.58362 -14.8729 < 2.2e-16 ***
## year8594Diagnosed 85-94 -0.24334    0.06584  -3.6959 0.0002191 ***
## nsx(log(time), df = 4)1  6.58737    0.57687  11.4192 < 2.2e-16 ***
## nsx(log(time), df = 4)2  5.65891    0.38069  14.8647 < 2.2e-16 ***
## nsx(log(time), df = 4)3 11.41694    1.14340   9.9851 < 2.2e-16 ***
## nsx(log(time), df = 4)4  4.93069    0.24526  20.1043 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 8440.488

eform(fpma)["year8594Diagnosed 85-94",]

## Warning in .local(object, parm, level, ...): non-monotonic spline fit to
## profile (nsx(log(time), df = 4)2): reverting from spline to linear
## approximation

## exp(beta)     2.5 %    97.5 % 
## 0.7840078 0.7130851 0.8594660

The hazard ratio, 95% confidence interval and statistical significance are very similar to the Cox model.

cox <- coxph(Surv(time, event) ~ year8594,
             data=melanoma0) # year8594 is a categorical variable
summary(cox)

## Call:
## coxph(formula = Surv(time, event) ~ year8594, data = melanoma0)
## 
##   n= 5318, number of events= 960 
## 
##                             coef exp(coef) se(coef)      z Pr(>|z|)    
## year8594Diagnosed 85-94 -0.25297   0.77649  0.06579 -3.845 0.000121 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## year8594Diagnosed 85-94    0.7765      1.288    0.6825    0.8834
## 
## Concordance= 0.533  (se = 0.008 )
## Likelihood ratio test= 14.83  on 1 df,   p=1e-04
## Wald test            = 14.78  on 1 df,   p=1e-04
## Score (logrank) test = 14.86  on 1 df,   p=1e-04

(b)

The predicted survival and hazard functions are shown below.

## (b) Prediction and plots of survival and hazard by calendar period
years <- levels(melanoma0$year8594)

s <- predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
             type="surv")
head(s)

##          year8594      time  Estimate     lower     upper
## 1 Diagnosed 75-84 0.1578874 0.9997036 0.9991978 0.9998905
## 2 Diagnosed 85-94 0.1578874 0.9997676 0.9993710 0.9999142
## 3 Diagnosed 75-84 0.1907748 0.9995356 0.9988822 0.9998071
## 4 Diagnosed 85-94 0.1907748 0.9996359 0.9991236 0.9998487
## 5 Diagnosed 75-84 0.2236622 0.9993239 0.9985215 0.9996909
## 6 Diagnosed 85-94 0.2236622 0.9994699 0.9988409 0.9997576

ggplot(s, aes(x=time,y=Estimate,fill=year8594,ymin=lower,ymax=upper)) +
    xlab("Time since diagnosis (years)") +
    ylab("Survival") +
    geom_ribbon(alpha=0.6) +
    geom_line()

predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
        type="hazard") |>
    with(plt(Estimate~time|year8594, ymin=lower,ymax=upper,
             xlab="Time since diagnosis (years)",
             ylab="Survival",
             type="ribbon"))

(c)

There is a constant difference as the predictions are from a proportional hazards model and a multiplicative effect becomes additive on the log scale.

## (c) hazards on log scale, adding log="y"
predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
        type="hazard") |>
    with(plt(Estimate~time|year8594, ymin=lower,ymax=upper,
             xlab="Time since diagnosis (years)",
             log="y",
             ylab="Survival",
             type="ribbon"))

(d)

The log hazard ratios (and hence the hazard ratios) from 2 df and up are similar and for 3 df they are very similar. The main difference is for 1 df, which is equivalent to a Weibull model. The Weibull model enforces a monotonic hazard function and as the hazard function in the melanoma data has a turning point it is clearly inappropriate.
The lowest AIC is for the model with 5 df and for the BIC it is the model with 2 df. The penalty term in the AIC is twice the number of parameters (\(2 \times k\)) whereas in the BIC it is \(\log(D) \times k\) where \(D\) is the number of events. Since \(\log(D) > k\) the BIC penalizes extra parameters much more strongly than AIC. Since we have a large data set and there are no disadvantages to including extra parameters we would use 5 df for the baseline hazard.

## utility function to row bind from a list
lapply(1:6, function(i) {
    fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
    data.frame(
        i,
        AIC=AIC(fitaic),
        BIC=BIC(fitaic),
        beta=as.numeric(coef(fitaic)[2]),
        se=coef(summary(fitaic))[2,2])
}) |> do.call(what=rbind)

##   i      AIC      BIC       beta         se
## 1 1 8676.812 8696.548 -0.1157531 0.06575733
## 2 2 8452.828 8479.144 -0.2396001 0.06584093
## 3 3 8452.433 8485.327 -0.2438929 0.06581083
## 4 4 8452.488 8491.961 -0.2433363 0.06584011
## 5 5 8447.417 8493.469 -0.2455450 0.06580441
## 6 6 8447.710 8500.341 -0.2460036 0.06580312

(e)

## Baseline survival
fitaic0 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic0,newdata=data.frame(year8594=years[1]), lty=6, ci=FALSE,
     xlab="Time since diagnosis (years)")
for (i in 1:5 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  lines(fitaic,newdata=data.frame(year8594=years[1]), lty=i)
}
legend("topright", legend=paste0("df=",1:6), lty=1:6)

The code for hazards is very similar:

## Baseline hazard
fitaic1 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic1,newdata=data.frame(year8594=years[1]), lty=6, type="haz",
     ci=FALSE, xlab="Time since diagnosis (years)", ylab="Hazard")
for (i in 1:5 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  lines(fitaic,type="haz",newdata=data.frame(year8594=years[1]), lty=i)
}
legend("topright", legend=paste0("df=",1:6), lty=1:6)

With the exception of 1 df (the Weibull model), the survival and hazard functions show similar shapes, so as long we have enough knots our conclusions would be very similar.

(f)

fpmf <- stpm2(Surv(time, event) ~ sex + year8594 + agegrp,
              data=melanoma0, df=4)
summary(fpmf)

## Maximum likelihood estimation
## 
## Call:
## stpm2(formula = Surv(time, event) ~ sex + year8594 + agegrp, 
##     data = melanoma0, df = 4)
## 
## Coefficients:
##                          Estimate Std. Error  z value     Pr(z)    
## (Intercept)             -8.871693   0.588596 -15.0726 < 2.2e-16 ***
## sexFemale               -0.531564   0.065453  -8.1213 4.613e-16 ***
## year8594Diagnosed 85-94 -0.324071   0.066235  -4.8928 9.942e-07 ***
## agegrp45-59              0.283536   0.094174   3.0108  0.002606 ** 
## agegrp60-74              0.621701   0.090878   6.8411 7.860e-12 ***
## agegrp75+                1.223829   0.104452  11.7167 < 2.2e-16 ***
## nsx(log(time), df = 4)1  6.611562   0.577354  11.4515 < 2.2e-16 ***
## nsx(log(time), df = 4)2  5.715009   0.381092  14.9964 < 2.2e-16 ***
## nsx(log(time), df = 4)3 11.461392   1.145638  10.0044 < 2.2e-16 ***
## nsx(log(time), df = 4)4  5.019392   0.245502  20.4454 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 8241.324

eform(fpmf)[2:6,]

## Warning in .local(object, parm, level, ...): non-monotonic spline fit to
## profile (nsx(log(time), df = 4)2): reverting from spline to linear
## approximation

##                         exp(beta)     2.5 %    97.5 %
## sexFemale               0.5876853 0.5342863 0.6445097
## year8594Diagnosed 85-94 0.7231987 0.6577743 0.7927980
## agegrp45-59             1.3278161 1.1745477 1.4938284
## agegrp60-74             1.8620932 1.6645414 2.0746573
## agegrp75+               3.4001815 2.9272327 3.9212110

## To test the overall effect of age with LR test
fpmf2 <- stpm2(Surv(time,event) ~ sex + year8594, data=melanoma0, df=4)
anova(fpmf, fpmf2)

## Likelihood Ratio Tests
## Model 1: fpmf, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp45-59+agegrp60-74+agegrp75++nsx(log(time), df = 4)1+
##           nsx(log(time), df = 4)2+nsx(log(time), df = 4)3+nsx(log(time), df =
##           4)4
## Model 2: fpmf2, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           nsx(log(time), df = 4)1+nsx(log(time), df = 4)2+nsx(log(time), df =
##           4)3+nsx(log(time), df = 4)4
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     10   8241.3                         
## 2      7   8385.9 144.56  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

After modelling for sex, calendar period age group and time with five degrees of freedom, the adjusted hazard ratios for: females compared with males was 0.59 (95% CI: 0.54, 0.64); diagnoses during the period 1985–1994 compared with those diagnosed 1974–1984 was 0.72 (95% CI: 0.66, 0.79); and age groups 45–59, 60–74 and 75+ compared with ages 0–44 years were 1.33 (95% CI: 1.17, 1.49), 1.86 (95% CI: 1.66. 2.07) and 3.40 (95% CI: 2.93, 3.92), respectively.

Undertaking a likelihood ratio test, there was strong evidence (p<1e-16) that age group contributed significantly to the model fit.

(g)

summary(fit <- coxph(Surv(time, event) ~ sex + year8594 + agegrp,
                     data=melanoma0))

## Call:
## coxph(formula = Surv(time, event) ~ sex + year8594 + agegrp, 
##     data = melanoma0)
## 
##   n= 5318, number of events= 960 
## 
##                             coef exp(coef) se(coef)      z Pr(>|z|)    
## sexFemale               -0.53061   0.58825  0.06545 -8.107 5.19e-16 ***
## year8594Diagnosed 85-94 -0.33339   0.71649  0.06618 -5.037 4.72e-07 ***
## agegrp45-59              0.28283   1.32688  0.09417  3.003  0.00267 ** 
## agegrp60-74              0.62006   1.85904  0.09088  6.823 8.90e-12 ***
## agegrp75+                1.21801   3.38045  0.10443 11.663  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## sexFemale                  0.5882     1.7000    0.5174    0.6688
## year8594Diagnosed 85-94    0.7165     1.3957    0.6293    0.8157
## agegrp45-59                1.3269     0.7536    1.1032    1.5959
## agegrp60-74                1.8590     0.5379    1.5557    2.2215
## agegrp75+                  3.3804     0.2958    2.7547    4.1483
## 
## Concordance= 0.646  (se = 0.009 )
## Likelihood ratio test= 212.7  on 5 df,   p=<2e-16
## Wald test            = 217.9  on 5 df,   p=<2e-16
## Score (logrank) test = 226.8  on 5 df,   p=<2e-16

anova(fit)

## Analysis of Deviance Table
##  Cox model: response is Surv(time, event)
## Terms added sequentially (first to last)
## 
##           loglik   Chisq Df Pr(>|Chi|)    
## NULL     -7899.0                          
## sex      -7873.4  51.235  1  8.193e-13 ***
## year8594 -7864.4  18.089  1  2.109e-05 ***
## agegrp   -7792.7 143.365  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The estimates are so similar because very similar models are being fitted with exactly the same covariates. The two models differ only in the manner in which they account for the baseline hazard. In the Cox model it is assumed arbitrary and not directly estimated. In the flexible parametric model the baseline hazard is modelled using splines. The 5 df spline allows sufficient flexibility to make the model estimates virtually identical.

(h)

## (h) Change to time-varying effect of agegrp2:4
## NB: including main effect of agegrp
fpmh <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
              data=melanoma0, tvc=list(agegrp2 = 2, agegrp3 = 2, agegrp4 = 2),
              df=4)
summary(fpmh)

## Maximum likelihood estimation
## 
## Call:
## stpm2(formula = Surv(time, event) ~ sex + year8594 + agegrp2 + 
##     agegrp3 + agegrp4, data = melanoma0, tvc = list(agegrp2 = 2, 
##     agegrp3 = 2, agegrp4 = 2), df = 4)
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                 -11.430385   1.352688 -8.4501
## sexFemale                                    -0.525715   0.065423 -8.0356
## year8594Diagnosed 85-94                      -0.328279   0.066318 -4.9501
## agegrp2                                       1.895592   1.452011  1.3055
## agegrp3                                       1.672483   1.400490  1.1942
## agegrp4                                       5.203061   1.357570  3.8326
## nsx(log(time), df = 4)1                       8.987875   1.342336  6.6957
## nsx(log(time), df = 4)2                       7.580891   0.984030  7.7039
## nsx(log(time), df = 4)3                      15.766479   2.409215  6.5442
## nsx(log(time), df = 4)4                       6.513171   0.703624  9.2566
## as.numeric(agegrp2):nsx(log(time), df = 2)1  -2.659503   2.584875 -1.0289
## as.numeric(agegrp2):nsx(log(time), df = 2)2  -0.805394   0.592422 -1.3595
## nsx(log(time), df = 2)1:as.numeric(agegrp3)  -1.510981   2.494029 -0.6058
## nsx(log(time), df = 2)2:as.numeric(agegrp3)  -0.718230   0.570820 -1.2582
## nsx(log(time), df = 2)1:as.numeric(agegrp4)  -6.730551   2.427744 -2.7723
## nsx(log(time), df = 2)2:as.numeric(agegrp4)  -2.029783   0.549990 -3.6906
##                                                 Pr(z)    
## (Intercept)                                 < 2.2e-16 ***
## sexFemale                                   9.312e-16 ***
## year8594Diagnosed 85-94                     7.418e-07 ***
## agegrp2                                     0.1917245    
## agegrp3                                     0.2323947    
## agegrp4                                     0.0001268 ***
## nsx(log(time), df = 4)1                     2.146e-11 ***
## nsx(log(time), df = 4)2                     1.320e-14 ***
## nsx(log(time), df = 4)3                     5.980e-11 ***
## nsx(log(time), df = 4)4                     < 2.2e-16 ***
## as.numeric(agegrp2):nsx(log(time), df = 2)1 0.3035404    
## as.numeric(agegrp2):nsx(log(time), df = 2)2 0.1739901    
## nsx(log(time), df = 2)1:as.numeric(agegrp3) 0.5446214    
## nsx(log(time), df = 2)2:as.numeric(agegrp3) 0.2083038    
## nsx(log(time), df = 2)1:as.numeric(agegrp4) 0.0055653 ** 
## nsx(log(time), df = 2)2:as.numeric(agegrp4) 0.0002237 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 8212.053

## LR test comparing fpmh (non-PH for agegrp2:4) with fpmf(PH for agegrp2:4)
anova(fpmh, fpmf)

## Likelihood Ratio Tests
## Model 1: fpmh, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp2+agegrp3+agegrp4+nsx(log(time), df = 4)1+nsx(log(time), df
##           = 4)2+nsx(log(time), df = 4)3+nsx(log(time), df = 4)4+
##           as.numeric(agegrp2):nsx(log(time), df = 2)1+
##           as.numeric(agegrp2):nsx(log(time), df = 2)2+nsx(log(time), df =
##           2)1:as.numeric(agegrp3)+nsx(log(time), df = 2)2:as.numeric(agegrp3)+
##           nsx(log(time), df = 2)1:as.numeric(agegrp4)+nsx(log(time), df =
##           2)2:as.numeric(agegrp4)
## Model 2: fpmf, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp45-59+agegrp60-74+agegrp75++nsx(log(time), df = 4)1+
##           nsx(log(time), df = 4)2+nsx(log(time), df = 4)3+nsx(log(time), df =
##           4)4
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     16   8212.1                         
## 2     10   8241.3 29.271  6  5.406e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There is strong evidence of a non-proportional effect of age.

We could also investigate the non-proportional effect of age with penalized models, where sp is the optimal smoothing parameters estimated from models without sp argument:

## I investigated the non-proportional effect of age with penalized models
## here, sp is the optimal smoothing parameters estimated from models without sp argument
pfit0 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp,
                data=melanoma0, sp=0.1359685)

## The time-dependent effects including linear forms of age groups
pfit1 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                tvc=list(agegrp2=7,agegrp3=7,agegrp4=7),
                data=melanoma0, sp=c( 0.1429949, 1.6133966, 1.3183117, 1.9958815))
anova(pfit1, pfit0)## the results also suggest there is strong evidence

## Likelihood Ratio Tests
## Model 1: pfit1, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           s(log(time)).1+s(log(time)).2+s(log(time)).3+s(log(time)).4+
##           s(log(time)).5+s(log(time)).6+s(log(time)).7+s(log(time)).8+
##           s(log(time)).9+s(log(time)):agegrp2.1+s(log(time)):agegrp2.2+
##           s(log(time)):agegrp2.3+s(log(time)):agegrp2.4+
##           s(log(time)):agegrp2.5+s(log(time)):agegrp2.6+
##           s(log(time)):agegrp2.7+s(log(time)):agegrp3.1+
##           s(log(time)):agegrp3.2+s(log(time)):agegrp3.3+
##           s(log(time)):agegrp3.4+s(log(time)):agegrp3.5+
##           s(log(time)):agegrp3.6+s(log(time)):agegrp3.7+
##           s(log(time)):agegrp4.1+s(log(time)):agegrp4.2+
##           s(log(time)):agegrp4.3+s(log(time)):agegrp4.4+
##           s(log(time)):agegrp4.5+s(log(time)):agegrp4.6+
##           s(log(time)):agegrp4.7
## Model 2: pfit0, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp45-59+agegrp60-74+agegrp75++s(log(time)).1+s(log(time)).2+
##           s(log(time)).3+s(log(time)).4+s(log(time)).5+s(log(time)).6+
##           s(log(time)).7+s(log(time)).8+s(log(time)).9
##   Tot Df Deviance  Chisq     Df Pr(>Chisq)   
## 1 21.555   8205.8                            
## 2 14.124   8230.9 25.098 7.4306   0.001003 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(i)

The baseline hazard is shown below. This baseline is for the youngest age group who are male and diagnosed in 1975–1984, i.e, when all the covariates are equal to zero.

## Plot of baseline hazard with fpmh
newdata1 <- data.frame(sex="Male",year8594="Diagnosed 75-84",
                       agegrp2=0, agegrp3=0, agegrp4=0)
plot(fpmh, newdata=newdata1, xlab="Time since diagnosis (years)", 
     type="haz")

(j)

plot(fpmh, newdata=newdata1, xlab="Time since diagnosis (years)",
     type="hr",var="agegrp2", ci=FALSE, ylim=c(0,6))
lines(fpmh, newdata=newdata1, type="hr", var="agegrp3", lty=2)
lines(fpmh, newdata=newdata1, type="hr", var="agegrp4", lty=3)
legend("topright", legend=paste0(agegrps[-1]," vs 0-44"), lty=1:3)

plot(fpmh, newdata=newdata1,
     type="hr", log="y",
     exposed=function(data) transform(data,agegrp4=agegrp4+1), # same as var="agegrp4"
     xlab="Time since diagnosis (years)")

    The hazard ratios decrease as a function of follow-up time. The
    hazard ratio is so high during the early years of follow-up
    because the hazard in the reference group is close to
    zero. The hazard ratio for the oldest age
    group with 95% confidence intervals is also shown.

(k)

plot(fpmh,newdata=newdata1,
     type="hdiff", var="agegrp4",
     xlab="Time since diagnosis (years)")

The hazard difference is small early on, despite the hazard ratio being large, because the underlying hazard is so low.

(l)

plot(fpmh,newdata=newdata1,
     type="sdiff", var="agegrp4",
     xlab="Time since diagnosis (years)")

(m)

lapply(1:3, function(i) {
    fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                   data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
                   df=4)
    data.frame(i,
               aic=AIC(fitdf),
               bic=BIC(fitdf))
}) |> do.call(what=rbind)

##   i      aic      bic
## 1 1 8245.961 8331.486
## 2 2 8244.053 8349.315
## 3 3 8248.623 8373.621

## Plots with different df
fitdf1 <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                data=melanoma0,
                tvc = list(agegrp2 = 1, agegrp3 = 1, agegrp4 = 1), df=4)

plot(fitdf1, newdata = newdata1,
     type="hr", ci=FALSE, log="y", var="agegrp4",
     xlab="Time since diagnosis (years)")
for (i in 2:3 ) {
    fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                   data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
                   df=4)
    lines(fitdf, newdata=newdata1,
          type="hr", lty=i,
          var="agegrp4")
}
legend("topright", legend=c("1 df", "2 df", "3 df"), lty=1:3)

Biostatistics III in R

Code by Xing-Rong Liu and Mark Clements

Exercise 28. Flexible parametric survival models in R

(a)

(b)

(c)

(d)

(e)

(f)

(g)

(h)

(i)

(j)

(k)

(l)

(m)