spatialfusion: short demo

Craig Wang (craig.wang@math.uzh.ch)

June 21, 2019

This brief demo provides code and output for fitting spatial fusion models using R package spatialfusion. The first section analyze the built-in synthetic dataset with INLA implementation while the second section analyze a simulated dataset with Stan implementation. The method argument in fusionData() function decides on which implementation to use.

1. Spatial fusion modelling with INLA on built-in synthetic data

Load libraries

library(spatialfusion)
## Loading spatialfusion (version 0.7):
## - The compilation time for a Stan model can be up to 20s.
## - INLA method is recommended for larger datasets (> 1000 observations).
## - It is good practice to test the model specification on sub-sampled dataset first before running it on the full dataset.
library(tmap, quietly = TRUE)
library(sp, quietly = TRUE)
library(sf, quietly = TRUE)
## Linking to GEOS 3.13.1, GDAL 3.10.2, PROJ 9.5.1; sf_use_s2() is TRUE

Load and view built-in synthetic data

summary(dataGeo)
##   lungfunction        covariate                 geometry  
##  Min.   :-14.1384   Min.   :-2.76510   POINT        :200  
##  1st Qu.: -2.6764   1st Qu.:-0.68487   epsg:NA      :  0  
##  Median :  1.1040   Median :-0.04320   +proj=long...:  0  
##  Mean   :  0.9074   Mean   :-0.05798                      
##  3rd Qu.:  4.7340   3rd Qu.: 0.73442                      
##  Max.   : 17.9710   Max.   : 3.20051
summary(dataLattice)
##    mortality        covariate             pop               mr         
##  Min.   :  0.00   Min.   :-2.86427   Min.   :   362   Min.   : 0.0000  
##  1st Qu.:  0.00   1st Qu.:-0.67110   1st Qu.:  1880   1st Qu.: 0.0000  
##  Median :  1.00   Median :-0.05943   Median :  4431   Median : 0.2741  
##  Mean   : 14.90   Mean   :-0.05240   Mean   :  9357   Mean   : 1.7206  
##  3rd Qu.:  6.75   3rd Qu.: 0.52046   3rd Qu.:  7880   3rd Qu.: 1.4923  
##  Max.   :540.00   Max.   : 2.07776   Max.   :413912   Max.   :26.3228  
##           geometry  
##  POLYGON      :162  
##  epsg:NA      :  0  
##  +proj=long...:  0  
##                     
##                     
## 
summary(dataPP)
##           geometry  
##  POINT        :116  
##  epsg:NA      :  0  
##  +proj=long...:  0

Plot data

tm_shape(dataLattice) + tm_polygons(col = "white") +
  tm_shape(dataGeo) + tm_dots(size = 0.1) +
  tm_add_legend(type = "symbol", shape = 16, size = 0.3, col = "black", label = "geostatistical") +
  tm_shape(dataPP) + tm_symbols(col = "red", shape = 4, size = 0.02) +  
  tm_add_legend(type = "symbol", shape = 4, size = 0.2, col = "red", label = "point pattern") +
  tm_layout(main.title = "dataGeo, dataPP", main.title.size = 1, 
            frame = F, fontface = 2, legend.outside = T)

tm_shape(dataLattice) +
  tm_fill(col = "mr", style = "fixed", breaks = c(0, 0.5, 2, 10, 30),
          title = "Mortality rate \n(per thousand)", legend.reverse = T) + tm_borders() +
  tm_layout(main.title = "dataLattice", main.title.size = 1, frame = F, fontface = 2,
            legend.position = c(0.77,0.8), legend.text.size = 0.5, legend.title.size = 0.5)

Data preparation

dat <- fusionData(geo.data = dataGeo, geo.formula = lungfunction ~ covariate,
                  lattice.data = dataLattice,
                  lattice.formula = mortality ~ covariate + log(pop),
                  pp.data = dataPP, distributions = c("normal", "poisson"),
                  method = "INLA")

dat
## data object for spatial fusion modeling with INLA consisting of: 
## - 1 geostatistical variable(s)
## - 1 lattice variable(s)
## - 1 point pattern variable(s)
## 
## Provide this object as 'data' argument in fusion() to fit a spatial fusion model.

Fit a spatial fusion model

if (require(INLA)){
mod <- fusion(data = dat, n.latent = 1, bans = matrix(c(0,0,0), ncol = 1),
              pp.offset = 400, prior.range = c(0.1, 0.5),
              prior.sigma = c(1, 0.5),  mesh.locs = dat$locs_point,
              mesh.max.edge = c(0.05, 0.5))
}
## Loading required package: INLA
## Loading required package: Matrix
## This is INLA_24.12.11 built 2024-12-11 19:58:26 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - List available models/likelihoods/etc with inla.list.models()
##  - Use inla.doc(<NAME>) to access documentation

Inspect the fit

if (require(INLA)){
mod_fit <- fitted(mod, type = "link")
oldpar <- par(mfrow = c(1,2))
plot(dataGeo$lungfunction, mod_fit$point1,
     xlab = "Observed", ylab = "Fitted")
abline(0,1)
plot(log(dataLattice$mortality), mod_fit$area1,
     xlab = "Observed", ylab = "Fitted")
abline(0,1)
par(oldpar)
}

Check parameter estimates

if (require(INLA)){
summary(mod, digits = 3)
}
## Model:
## geostatistical formula: lungfunction ~ covariate 
## lattice formula: mortality ~ covariate + log(pop) 
## point pattern variables: 1 
## latent process(es): 1 
## --------------
## Fixed effect coefficients:
##                       mean     sd 0.025quant 0.5quant 0.975quant  mode
## intercept (beta_p11)  1.02 0.1320      0.737     1.02       1.26  1.04
## covariate (beta_p12)  4.98 0.0554      4.870     4.98       5.09  4.98
## intercept (beta_a11) -8.22 0.2350     -8.690    -8.22      -7.77 -8.22
## covariate (beta_a12)  2.03 0.0484      1.940     2.03       2.13  2.03
## log(pop) (beta_a13)   1.04 0.0219      0.997     1.04       1.08  1.04
## 
## Latent parameters:
##                     mean     sd 0.025quant 0.5quant 0.975quant  mode
## Gaussian precision 1.750 0.2280      1.350    1.740      2.240 1.720
## Range for s11      0.313 0.1160      0.151    0.292      0.601 0.253
## Stdev for s11      0.824 0.1770      0.537    0.804      1.230 0.761
## Beta for s12       0.439 0.0802      0.283    0.438      0.599 0.434
## Beta for s13       0.914 0.1660      0.593    0.911      1.250 0.903

Diagnostic plots

if (require(INLA)){
plot(mod, interactive = FALSE)
}

if (require(INLA)){
plot(mod, posterior = FALSE)
}

Predict latent surface

if (require(INLA)){
pred.locs <- st_as_sf(spsample(as_Spatial(dataDomain), 20000, type = "regular"))
mod.pred <- predict(mod, pred.locs)
mod.pred <- st_set_crs(mod.pred, "+proj=longlat +ellps=WGS84")
tm_shape(mod.pred) +
  tm_symbols(col = "latent.s11", shape = 15, size = 0.05, style = "cont",
             midpoint = NA, legend.col.reverse = TRUE, palette = "Oranges",
             title.col = "Latent process") +
  tm_shape(dataLattice) + tm_borders() +
  tm_layout(frame = FALSE, legend.outside = TRUE)
}
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `symbols()`: instead of `style = "cont"`, use fill.scale =
## `tm_scale_continuous()`.
## ℹ Migrate the argument(s) 'midpoint', 'palette' (rename to 'values') to
##   'tm_scale_continuous(<HERE>)'
## For small multiples, specify a 'tm_scale_' for each multiple, and put them in a
## list: 'fill'.scale = list(<scale1>, <scale2>, ...)'
## [v3->v4] `symbols()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title.col' (rename to 'title') to 'fill.legend =
## tm_legend(<HERE>)'
## [v3->v4] `tm_symbols()`: migrate the argument(s) related to the scale of the
## visual variable `shape` namely 'midpoint' to shape.scale =
## tm_scale_intervals(<HERE>).
## ℹ For small multiples, specify a 'tm_scale_' for each multiple, and put them in
##   a list: 'shape.scale = list(<scale1>, <scale2>, ...)'
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"
## Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.
## 
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

2. Spatial fusion modelling with Stan on simulated data

Simulate data

dat <- fusionSimulate(n.point = 200, n.area = 30, n.grid = 5, n.pred = 100,
                      psill = 1.5, phi = 1, nugget = 0, tau.sq = 0.2,
                      dimension = 10, domain = NULL, point.beta = list(rbind(1,5)),
                      area.beta = list(rbind(1.5, 1.5)), nvar.pp = 1,
                      distributions = c("normal","poisson"),
                      design.mat = matrix(c(2, 0.5, 1), ncol = 1), 
                      pp.offset = 0.5, seed = 1)

geo.data <- st_as_sf(data.frame(y = dat$mrf[dat$sample.ind,"x"],
                       x = dat$mrf[dat$sample.ind,"y"],
                       cov.point = dat$data$X_point[,2],
                       outcome =dat$dat$Y_point[[1]]), 
                     coords = c("x","y"),
                     crs = st_crs("+proj=longlat +ellps=WGS84"))
            
                               
lattice.data <- cbind(dat$poly,
                     (data.frame(outcome = dat$data$Y_area[[1]],
                                 cov.area = dat$data$X_area[,2])))
lattice.data <- st_set_crs(lattice.data, "+proj=longlat +ellps=WGS84")

pp.data <- dat$data$lgcp.coords[[1]]
pp.data <- st_set_crs(pp.data, "+proj=longlat +ellps=WGS84")

Plot data

tm_shape(lattice.data) + tm_polygons(col = "white") + 
  tm_shape(geo.data) + tm_dots(size = 0.1) +
  tm_add_legend(type = "symbol", shape = 16, size = 0.3, col = "black", label = "geostatistical") +
  tm_shape(pp.data) + tm_symbols(col = "red", shape = 4, size = 0.02) +
  tm_add_legend(type = "symbol", shape = 4, size = 0.2, col = "red", label = "point pattern") +
  tm_layout(main.title = "geo.data, pp.data", main.title.size = 1, 
            frame = FALSE, fontface = 2, legend.outside = TRUE)
## [v3->v4] `tm_add_legend()`: use `type = "symbols"` instead of `type =
## "symbol"`.
## [v3->v4] `tm_add_legend()`: use `fill` instead of `col` for the fill color of
## symbols.
## [v3->v4] `tm_add_legend()`: use `type = "symbols"` instead of `type =
## "symbol"`.
## [v3->v4] `tm_add_legend()`: use `fill` instead of `col` for the fill color of
## symbols.
## [v3->v4] `tm_layout()`: use text.fontface instead of fontface
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`

tm_shape(lattice.data) + 
  tm_fill(col="outcome", style="fixed", breaks=c(0, 10, 50, 200, 1200),
          title = "Outcome", legend.reverse = T) + tm_borders() +
  tm_layout(main.title="lattice.data", main.title.size = 1, frame = FALSE, fontface = 2,
            legend.position = c(0.77,0.8), legend.text.size = 0.5, legend.title.size = 0.5)
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "fixed"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'breaks' to 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title', 'legend.reverse' (rename to 'reverse')
## to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use text.fontface instead of fontface
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`

Data preparation

dat2 <- fusionData(geo.data = geo.data, geo.formula = outcome ~ cov.point,
                   lattice.data = lattice.data, lattice.formula = outcome ~ cov.area,
                   pp.data = pp.data, distributions = c("normal","poisson"), 
                   method = "Stan")
dat2
## data object for spatial fusion modeling with Stan consisting of: 
## - 1 geostatistical variable(s), with 200 locations 
## - 1 lattice variable(s), with 150 sampling point locations 
## - 1 point pattern variable(s), with 100 gridded locations 
## 
## Provide this object as 'data' argument in fusion() to fit a spatial fusion model.

Fit a spatial fusion model

mod <- fusion(data = dat2, n.latent = 1, bans = matrix(c(0,0,0), ncol = 1),
              pp.offset = 0.5, prior.phi = list(distr = "normal", pars = c(1, 1)),
              nsamples = 1000, nburnin = 500, nchain = 1, ncore = 1)

Inspect the fit

mod_fit <- fitted(mod, type = "link")
oldpar <- par(mfrow = c(1,2))
plot(dat$data$Y_point[[1]], mod_fit$point1, xlab = "Observed", ylab = "Fitted")
abline(0,1)
plot(log(dat$data$Y_area[[1]]), mod_fit$area1, xlab = "Observed", ylab = "Fitted")
abline(0,1)

par(oldpar)

Check parameter estimates

summary(mod, digits = 2) 
## Model:
## geostatistical formula: outcome ~ cov.point 
## lattice formula: outcome ~ cov.area 
## point pattern variables: 1 
## latent process(es): 1 
## --------------
## Fixed effect coefficients:
##                         mean se_mean   sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## intercept (beta_p[1,1])  1.4  0.0190 0.23 0.93 1.2 1.4 1.6   1.8   140    1
## cov.point (beta_p[1,2])  4.9  0.0170 0.14 4.60 4.8 4.9 5.0   5.2    74    1
## intercept (beta_a[1,1])  1.5  0.0069 0.13 1.20 1.4 1.5 1.5   1.7   360    1
## cov.area (beta_a[1,2])   1.6  0.0074 0.13 1.40 1.6 1.6 1.7   1.9   290    1
## 
## Latent parameters:
##           mean se_mean   sd  2.5%  25%  50%  75% 97.5% n_eff Rhat
## tau_sq[1] 0.42   0.061 0.24 0.170 0.26 0.35 0.49  1.10    16  1.1
## phi[1]    0.51   0.011 0.11 0.330 0.43 0.49 0.55  0.78   100  1.0
## Z_1[1]    2.10   0.021 0.15 1.800 2.00 2.10 2.20  2.40    48  1.1
## Z_2[1]    0.40   0.018 0.23 0.009 0.26 0.39 0.53  0.88   160  1.0
## Z_3[1]    1.40   0.015 0.20 1.100 1.30 1.40 1.50  1.80   170  1.0

Diagnostic plots

plot(mod, interactive = FALSE)

plot(mod, posterior = FALSE) 

## Enter <return> to proceed ...

Predict latent surface and compare with simulated truth

mod.pred <- predict(mod, new.locs = dat$pred.loc, type = "summary")
oldpar <- par(mfrow = c(1,1))
plot(dat$mrf[dat$pred.ind, c("sim1")], mod.pred$latent1, 
     xlab = "Truth", ylab = "Predicted")
abline(0,1)

par(oldpar)