Preamble

This tutorial demonstrates how to fit the TTR model to time series data using the R package TTR.PGM. It assumes the user is familiar with R. The tutorial is designed to accompany the manuscript “TTR.PGM: A R package for modelling the distributions and dynamics of plants using the Thornley transport resistance plant growth model”. This manuscript and the TTR.PGM help provide additional information.

Setting up

If necessary install the TTR.PGM package using e.g.

 install.packages("TTR.PGM_1.0.0.tar.gz",repos=NULL, type="source")
 setwd(tempdir())

Load the required packages.

library(LaplacesDemon)
library(DEoptim)
library(TTR.PGM)

Define the statistical model to be fitted

The model is written using LaplacesDemon style, which helps provide a predictable structure. In this case the model is setup as a state space model.

    model_ssm <- function(parm, Data)
    {
        ### parameters
        ttr_parm <- TTR.PGM::get_parms(parm, Data)
        parm <- TTR.PGM::interval_parms(parm,Data)
        names(parm)<-Data$parm.names

        ### Process model
        x <- TTR.PGM::run_ttr(ttr_parm,Data)
        
        ### Log-likelihood
        steps <- Data$options$steps
        Uc <-  x["Uc",,steps,,drop=T] 
        mu_sif <- Uc*parm["b1_sif"] + parm["b0_sif"]
        oe_sif <- parm["oe_sif_0"] + parm["oe_sif_1"]*Data$ymat[,"q"] 
        LL_sif <- sum( dnorm(Data$y[,"sif"], mu_sif, oe_sif, log = TRUE),na.rm=T)
        LL <- LL_sif 
        yhat <- c(rnorm(Data$n.time,mu_sif,oe_sif) )
        
        ### Log-Prior
        lo <- Data$bounds[,"lower"]
        up <- Data$bounds[,"upper"]
        bound.mu <- (lo+up)/2
        bound.sd <- abs(lo-up)/50
        prior_iM <- dunif( parm["iM"],lo["iM"],up["iM"],log=TRUE)
        prior_oe <- dnorm(parm["oe_sif_0"],0,1,log=TRUE)+
                    dnorm(parm["oe_sif_1"],0,1,log=TRUE)
        prior_pe <- sum(dgamma(parm[Data$pos.pe],3,10,log=TRUE))
        prior_scale <- dnorm(parm["b0_sif"],0,10,log=TRUE)  +
                          dnorm(parm["b1_sif"],0,100,log=TRUE) 
        prior_ttr_env <- sum(dnorm( parm[Data$pos.env],
                                mean = bound.mu[Data$pos.env],
                                sd = bound.sd[Data$pos.env],log=TRUE) ) 
        priors <- prior_iM + 
                    prior_oe + 
                    prior_pe + 
                    prior_scale +
                    prior_ttr_env  
                    
        ### Log-Posterior
        LP <- LL + priors 

        ### Return
        if (Data$options$optim == "DEoptim")
        return(-1 * LP) 
        if (Data$options$optim == "LaplacesDemon") 
            return(list(LP = LP, Dev = -2 * LL, Monitor = LP, yhat = yhat, parm = as.numeric(parm)))
        if (Data$options$optim == "no")
            return(list(sif = yhat))
}

Format the response and forcing data

Load a data.frame that contains the observational and environmental data for the time series. A prepared data set for a savanna site in Burkino Faso is used in this example (see help(data_input_time_SA_BFA_BNP)). The manuscript contains a brief description of how this data set was assembled.

    site.name <- "SA_BFA_BNP"
    data(data_input_time_SA_BFA_BNP)
    ttr.time.in <- data_input_time_SA_BFA_BNP

Now use the get_input function to format ttr.time.in for the TTR.PGM package. For more detailed information on options see help(get_input).

  seconds.week = 60*60*24*7 
  seconds.day = 60*60*24
  input <- with(ttr.time.in, TTR.PGM::get_input(observations = data.frame(sif=sif*100),
                                     lon = 1,
                                     lat = 1,
                                     tnur = tsoil,
                                     tcur = tair-273.15,
                                     tgrowth = tair-273.15,
                                     tloss = tair-273.15,
                                     seconds = seconds.week,
                                     p3=p3, p4=p4,
                                     catm = CA,
                                     rsds = srad/seconds.day,
                                     swc = msoil*100))

Specify the model fitting options

Specify some options (see help(standard_options)). In this case the “oak” variant is specified, and it is specified that process error and initial plant size should be estimated.

    time_options <- standard_options
    time_options$initial_mass_par <- TRUE
    time_options$pe <- TRUE
    time_options$ve <- "oak"
    time_options$steps <- 1:dim(input$timeseries)[2]
    time_options$optim <- "DEoptim"

Set the bounds; if these are not set the defaults are used. See help(bounds) for more information. If the statistical model requires additional parameters that are not in the defaults, they need to be defined here with their bounds. In this example, the oak model is simplified by removing some environmental factors by setting their bounds to NA.

    max.swc <- max(input$timeseries["swc",,1])
    bounds = list(alpha = list(
                    oe_sif_0 = c(0,100),
                    oe_sif_1 = c(0,100),
                    b1_sif = c(0,5000), 
                    b0_sif = c(-1000,1000)
                    ),   
                  beta = list(
                    CU_Ns_1 = c(NA,NA),
                    CU_Ns_2 = c(NA,NA),
                    CU_swc_1 = c(0,max.swc),
                    CU_swc_2 = c(0,100),
                    g_swc_1 = c(0,max.swc),
                    g_swc_2 = c(0,100),
                    g_tgrowth_1 = c(0,50),
                    g_tgrowth_2 = c(0,50),
                    g_tgrowth_3 = c(0,50),
                    g_tgrowth_4 = c(0,50),
                    L_mix_1 = c(50,100),
                    L_mix_2 = c(0,100),                    
                    L_mix_3 = c(0,100),                    
                    L_tloss_1 = c(-20,50),
                    L_tloss_2 = c(0,50),
                    L_swc_1 = c(0,max.swc),
                    L_swc_2 = c(0,100),
                    NU_Nsoil_1 = c(NA,NA),
                    NU_Nsoil_2 = c(NA,NA),
                    NU_swc_1 = c(0,max.swc),
                    NU_swc_2 = c(0,100),
                    NU_swc_3 = c(0,100),
                    NU_swc_4 = c(0,100),
                    NU_tnur_1 = c(0,50),
                    NU_tnur_2 = c(0,50),
                    pe_Ms=c(0,1),
                    pe_Mr=c(0,1),
                    pe_Cs=c(0,1),
                    pe_Cr=c(0,1),
                    pe_Ns=c(0,1),
                    pe_Nr=c(0,1),
                    iM=c(1,100)
                    ) 
                    )

Set some of the global parameters. See help(standard_globals).

    time_globals <- standard_globals
    time_globals["gmax"] = 50    
    time_globals["mmax"] = 0.010   
    time_globals["KM"] = 0.5 
    time_globals["KA"] = 50 

Using input, time_options, bounds and time_globals make the object needed to run and fit the model. See help(make_data).

    MyData <- TTR.PGM::make_data(input,options=time_options,bounds=bounds, globals=time_globals)

Add an extra field (MODIS data quality) to the data structure, this data field is needed by model_ssm.

  MyData$ymat <- data.frame( y = MyData$y,q =ttr.time.in$q) 

Add some other parameters that are needed by model_ssm.

    MyData$pos.pe <- grep("^pe",MyData$parm.names)
    MyData$pos.env <- c(grep("^CU_",MyData$parm.names),
                        grep("^NU_",MyData$parm.names),
                        grep("^g_",MyData$parm.names),
                        grep("^L_",MyData$parm.names))

Fit using DEoptim

Fit the model using DEoptim. See help(DEoptim) for how to use DEoptim. This will take several minutes depending on the DEoptim settings selected and the size of MyData. For production analyses the DEoptim documentation should be followed to make sensible decisions on which values to use for deoptim_pop and deoptim_iter.

    set.seed(2024)
    MyData$options$optim <- "DEoptim"
    deoptim_iter = 1000
    deoptim_trace = deoptim_iter
    deoptim_trace = 50 
    deoptim_pop = 50
    ft.DE<-DEoptim(model_ssm, upper=MyData$bounds[,"upper"],
                            lower=MyData$bounds[,"lower"],
                            control=DEoptim.control(
                            NP = deoptim_pop,
                            itermax = deoptim_iter,
                            trace = deoptim_trace,
                            CR=0.9, F=0.8, c=0.0,
                            initialpop = NULL,
                            strategy = 2,
                            parallelType=0,
                            parVar=c("MyData"),
                            packages = list("LaplacesDemon","TTR.PGM")),
                            Data=MyData)
## Warning in DEoptim(model_ssm, upper = MyData$bounds[, "upper"], lower = MyData$bounds[, : For many problems it is best to set 'NP' (in 'control') to be at least ten times the length of the parameter vector.

If required you can save ft.DE and MyData.

    saveRDS(ft.DE,paste0("ft.DE_",site.name,".rds"))                        
    saveRDS(MyData,paste0("MyData_",site.name,".rds"))     

Fit using LaplacesDemon

Fit the model using LaplacesDemon. See help(LaplacesDemon) for how to use LaplacesDemon. This will take several minutes depending on the LaplacesDemon settings selected and the size of MyData. In this example we use the DEoptim solution to define the Initial.Values used by LaplacesDemon. We then run multiple chains in serial, using the covariance matrix from previous chains in subsequent chains.

    ft.DE <- readRDS(paste0("ft.DE_",site.name,".rds"))
    MyData <- readRDS(paste0("MyData_",site.name,".rds"))                        
    iter<-2e4 
    status<-iter/100 
    thin<-10
    MyData$options$optim <- "LaplacesDemon"

    Initial.Values <- as.numeric(ft.DE$optim$bestmem)
    ft.LD.DRAM <- LaplacesDemon(model_ssm, Data=MyData, Initial.Values,
        Covar=NULL, Iterations=iter, Status=status, Thinning=thin,
        Algorithm="DRAM", Specs=list(Adaptive=iter/10, Periodicity=10))

    Initial.Values <- as.initial.values(ft.LD.DRAM)
    ft.LD.DRAM <- LaplacesDemon(model_ssm, Data=MyData, Initial.Values,
        Covar=ft.LD.DRAM$covar, Iterations=iter, Status=status, Thinning=thin,
        Algorithm="DRAM", Specs=list(Adaptive=iter/10, Periodicity=10))

Use the LaplacesDemon plot method to view the MCMC chains. The chains can be useful diagnostic tools to assess model convergence. Save the output.

## png 
##   2

Define some functions for summarising the output

Functions for calculating the Maximum a Posteriori.

    myMAPmean <- function(ft,MyData,q=0.9) {   
        LP<-ft$Monitor[,"LP"]
        n <- length(LP)
        LP <- LP[(n/2):n] # discard first half of chain
        wLP <- which(LP > quantile(LP,q) )
        MAPmeans <- apply(ft$Posterior1[wLP,MyData$parm.names],2,mean)
        return(MAPmeans)
        }
    myMAPquantile <- function(ft,MyData,q=c(0.25,0.75)) { 
        LP<-ft$Monitor[,"LP"]
        n <- length(LP)
        LP <- LP[(n/2):n] # discard first half of chain
        wLP <- which(LP > quantile(LP,q[1]) & LP < quantile(LP,q[2]))
        inQ <- ft$Posterior1[wLP,MyData$parm.names]
        return(inQ)
        }     

Functions for calculating the RMSE and R² values.

    calc_rmse <- function(observed, modelled) {
        rmse <- sqrt(mean((observed - modelled)^2))
        return(rmse)
    }
    calc_r_squared <- function(observed, modelled) {
        ss_total <- sum((observed - mean(observed))^2)
        ss_residual <- sum((observed - modelled)^2)
        rsq <- 1 - (ss_residual / ss_total)
        return(rsq)
    }

Propagate error in projections

    parm.m <-myMAPquantile(ft.LD.DRAM,MyData,q=c(0.01,0.99))
    sif.mat <- t(apply(parm.m, 1, function(x) {predict_ttr(x, model_ssm, MyData)$sif}))
    pred.lo <- apply(sif.mat,2,quantile,probs=0.025)    
    pred.up <- apply(sif.mat,2,quantile,probs=0.975)    
    pred.mean <- apply(sif.mat,2,mean)    

Plot the model fit.

First plot the data and model predictions as a time series.

  oldpar <- par(mar=c(4,4,2,4))
  mycol<-c("#92e2ff","#ff1f11","#377eb8")
  x<-ttr.time.in$date
  yup <- 100
  plot(x,MyData$y[,"sif"],type="p",ylim=c(0,yup),ylab="",xlab="",main=site.name,
         axes=F, pch=19, cex=0.5)
  axis(2,at=seq(0,yup,length=5),labels=seq(0,yup,length=5))
  polygon(c(x,rev(x)),c(pred.lo,rev(pred.up)),
          col=  adjustcolor( mycol[1], alpha.f = 0.6), border=mycol[1])
  points(x,MyData$y[,"sif"],pch=19,cex=0.5)
  lines(x,pred.mean,col=mycol[2],lwd=2.0,lty=1)
  axis.Date(1,at = seq(as.Date("2000/01/1"),as.Date("2021/12/1"), "years"), line=0)
  mtext("SIF", side=2, line=3, adj=0.5, cex=1.0, col="black", outer=FALSE)      

  par(oldpar)

Now plot the data versus model predictions.

 plot(MyData$y[,"sif"],pred.mean,pch=19,col=mycol[1],
      ylab="Predicted SIF",xlab="Observed SIF")
 abline(a=0,b=1,lwd=2)
 abline(lm(pred.mean~MyData$y[,"sif"]),lty=2,lwd=2)
 legend("topleft",bty="n",lty=c(1,2),legend=c("1:1 line","regression line"))

Calculate some goodness of fit statistics.

  calc_r_squared(MyData$y[,"sif"],pred.mean)    
## [1] 0.9038121
  calc_rmse(MyData$y[,"sif"],pred.mean)
## [1] 6.924631

Plot the figures as pdf files

## png 
##   2
## png 
##   2