This brief demo provides code and output for fitting spatial fusion
models using R package spatialfusion. The first section
analyze the built-in synthetic dataset with INLA implementation while
the second section analyze a simulated dataset with Stan implementation.
The method
argument in fusionData()
function
decides on which implementation to use.
## Loading spatialfusion (version 0.7):
## - The compilation time for a Stan model can be up to 20s.
## - INLA method is recommended for larger datasets (> 1000 observations).
## - It is good practice to test the model specification on sub-sampled dataset first before running it on the full dataset.
## Linking to GEOS 3.13.1, GDAL 3.10.2, PROJ 9.5.1; sf_use_s2() is TRUE
## lungfunction covariate geometry
## Min. :-14.1384 Min. :-2.76510 POINT :200
## 1st Qu.: -2.6764 1st Qu.:-0.68487 epsg:NA : 0
## Median : 1.1040 Median :-0.04320 +proj=long...: 0
## Mean : 0.9074 Mean :-0.05798
## 3rd Qu.: 4.7340 3rd Qu.: 0.73442
## Max. : 17.9710 Max. : 3.20051
## mortality covariate pop mr
## Min. : 0.00 Min. :-2.86427 Min. : 362 Min. : 0.0000
## 1st Qu.: 0.00 1st Qu.:-0.67110 1st Qu.: 1880 1st Qu.: 0.0000
## Median : 1.00 Median :-0.05943 Median : 4431 Median : 0.2741
## Mean : 14.90 Mean :-0.05240 Mean : 9357 Mean : 1.7206
## 3rd Qu.: 6.75 3rd Qu.: 0.52046 3rd Qu.: 7880 3rd Qu.: 1.4923
## Max. :540.00 Max. : 2.07776 Max. :413912 Max. :26.3228
## geometry
## POLYGON :162
## epsg:NA : 0
## +proj=long...: 0
##
##
##
## geometry
## POINT :116
## epsg:NA : 0
## +proj=long...: 0
tm_shape(dataLattice) + tm_polygons(col = "white") +
tm_shape(dataGeo) + tm_dots(size = 0.1) +
tm_add_legend(type = "symbol", shape = 16, size = 0.3, col = "black", label = "geostatistical") +
tm_shape(dataPP) + tm_symbols(col = "red", shape = 4, size = 0.02) +
tm_add_legend(type = "symbol", shape = 4, size = 0.2, col = "red", label = "point pattern") +
tm_layout(main.title = "dataGeo, dataPP", main.title.size = 1,
frame = F, fontface = 2, legend.outside = T)
tm_shape(dataLattice) +
tm_fill(col = "mr", style = "fixed", breaks = c(0, 0.5, 2, 10, 30),
title = "Mortality rate \n(per thousand)", legend.reverse = T) + tm_borders() +
tm_layout(main.title = "dataLattice", main.title.size = 1, frame = F, fontface = 2,
legend.position = c(0.77,0.8), legend.text.size = 0.5, legend.title.size = 0.5)
dat <- fusionData(geo.data = dataGeo, geo.formula = lungfunction ~ covariate,
lattice.data = dataLattice,
lattice.formula = mortality ~ covariate + log(pop),
pp.data = dataPP, distributions = c("normal", "poisson"),
method = "INLA")
dat
## data object for spatial fusion modeling with INLA consisting of:
## - 1 geostatistical variable(s)
## - 1 lattice variable(s)
## - 1 point pattern variable(s)
##
## Provide this object as 'data' argument in fusion() to fit a spatial fusion model.
if (require(INLA)){
mod <- fusion(data = dat, n.latent = 1, bans = matrix(c(0,0,0), ncol = 1),
pp.offset = 400, prior.range = c(0.1, 0.5),
prior.sigma = c(1, 0.5), mesh.locs = dat$locs_point,
mesh.max.edge = c(0.05, 0.5))
}
## Loading required package: INLA
## Loading required package: Matrix
## This is INLA_24.12.11 built 2024-12-11 19:58:26 UTC.
## - See www.r-inla.org/contact-us for how to get help.
## - List available models/likelihoods/etc with inla.list.models()
## - Use inla.doc(<NAME>) to access documentation
if (require(INLA)){
mod_fit <- fitted(mod, type = "link")
oldpar <- par(mfrow = c(1,2))
plot(dataGeo$lungfunction, mod_fit$point1,
xlab = "Observed", ylab = "Fitted")
abline(0,1)
plot(log(dataLattice$mortality), mod_fit$area1,
xlab = "Observed", ylab = "Fitted")
abline(0,1)
par(oldpar)
}
## Model:
## geostatistical formula: lungfunction ~ covariate
## lattice formula: mortality ~ covariate + log(pop)
## point pattern variables: 1
## latent process(es): 1
## --------------
## Fixed effect coefficients:
## mean sd 0.025quant 0.5quant 0.975quant mode
## intercept (beta_p11) 1.02 0.1320 0.737 1.02 1.26 1.04
## covariate (beta_p12) 4.98 0.0554 4.870 4.98 5.09 4.98
## intercept (beta_a11) -8.22 0.2350 -8.690 -8.22 -7.77 -8.22
## covariate (beta_a12) 2.03 0.0484 1.940 2.03 2.13 2.03
## log(pop) (beta_a13) 1.04 0.0219 0.997 1.04 1.08 1.04
##
## Latent parameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Gaussian precision 1.750 0.2280 1.350 1.740 2.240 1.720
## Range for s11 0.313 0.1160 0.151 0.292 0.601 0.253
## Stdev for s11 0.824 0.1770 0.537 0.804 1.230 0.761
## Beta for s12 0.439 0.0802 0.283 0.438 0.599 0.434
## Beta for s13 0.914 0.1660 0.593 0.911 1.250 0.903
if (require(INLA)){
pred.locs <- st_as_sf(spsample(as_Spatial(dataDomain), 20000, type = "regular"))
mod.pred <- predict(mod, pred.locs)
mod.pred <- st_set_crs(mod.pred, "+proj=longlat +ellps=WGS84")
tm_shape(mod.pred) +
tm_symbols(col = "latent.s11", shape = 15, size = 0.05, style = "cont",
midpoint = NA, legend.col.reverse = TRUE, palette = "Oranges",
title.col = "Latent process") +
tm_shape(dataLattice) + tm_borders() +
tm_layout(frame = FALSE, legend.outside = TRUE)
}
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `symbols()`: instead of `style = "cont"`, use fill.scale =
## `tm_scale_continuous()`.
## ℹ Migrate the argument(s) 'midpoint', 'palette' (rename to 'values') to
## 'tm_scale_continuous(<HERE>)'
## For small multiples, specify a 'tm_scale_' for each multiple, and put them in a
## list: 'fill'.scale = list(<scale1>, <scale2>, ...)'
## [v3->v4] `symbols()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title.col' (rename to 'title') to 'fill.legend =
## tm_legend(<HERE>)'
## [v3->v4] `tm_symbols()`: migrate the argument(s) related to the scale of the
## visual variable `shape` namely 'midpoint' to shape.scale =
## tm_scale_intervals(<HERE>).
## ℹ For small multiples, specify a 'tm_scale_' for each multiple, and put them in
## a list: 'shape.scale = list(<scale1>, <scale2>, ...)'
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"
## Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
dat <- fusionSimulate(n.point = 200, n.area = 30, n.grid = 5, n.pred = 100,
psill = 1.5, phi = 1, nugget = 0, tau.sq = 0.2,
dimension = 10, domain = NULL, point.beta = list(rbind(1,5)),
area.beta = list(rbind(1.5, 1.5)), nvar.pp = 1,
distributions = c("normal","poisson"),
design.mat = matrix(c(2, 0.5, 1), ncol = 1),
pp.offset = 0.5, seed = 1)
geo.data <- st_as_sf(data.frame(y = dat$mrf[dat$sample.ind,"x"],
x = dat$mrf[dat$sample.ind,"y"],
cov.point = dat$data$X_point[,2],
outcome =dat$dat$Y_point[[1]]),
coords = c("x","y"),
crs = st_crs("+proj=longlat +ellps=WGS84"))
lattice.data <- cbind(dat$poly,
(data.frame(outcome = dat$data$Y_area[[1]],
cov.area = dat$data$X_area[,2])))
lattice.data <- st_set_crs(lattice.data, "+proj=longlat +ellps=WGS84")
pp.data <- dat$data$lgcp.coords[[1]]
pp.data <- st_set_crs(pp.data, "+proj=longlat +ellps=WGS84")
tm_shape(lattice.data) + tm_polygons(col = "white") +
tm_shape(geo.data) + tm_dots(size = 0.1) +
tm_add_legend(type = "symbol", shape = 16, size = 0.3, col = "black", label = "geostatistical") +
tm_shape(pp.data) + tm_symbols(col = "red", shape = 4, size = 0.02) +
tm_add_legend(type = "symbol", shape = 4, size = 0.2, col = "red", label = "point pattern") +
tm_layout(main.title = "geo.data, pp.data", main.title.size = 1,
frame = FALSE, fontface = 2, legend.outside = TRUE)
## [v3->v4] `tm_add_legend()`: use `type = "symbols"` instead of `type =
## "symbol"`.
## [v3->v4] `tm_add_legend()`: use `fill` instead of `col` for the fill color of
## symbols.
## [v3->v4] `tm_add_legend()`: use `type = "symbols"` instead of `type =
## "symbol"`.
## [v3->v4] `tm_add_legend()`: use `fill` instead of `col` for the fill color of
## symbols.
## [v3->v4] `tm_layout()`: use text.fontface instead of fontface
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
tm_shape(lattice.data) +
tm_fill(col="outcome", style="fixed", breaks=c(0, 10, 50, 200, 1200),
title = "Outcome", legend.reverse = T) + tm_borders() +
tm_layout(main.title="lattice.data", main.title.size = 1, frame = FALSE, fontface = 2,
legend.position = c(0.77,0.8), legend.text.size = 0.5, legend.title.size = 0.5)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "fixed"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'breaks' to 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title', 'legend.reverse' (rename to 'reverse')
## to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use text.fontface instead of fontface
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
dat2 <- fusionData(geo.data = geo.data, geo.formula = outcome ~ cov.point,
lattice.data = lattice.data, lattice.formula = outcome ~ cov.area,
pp.data = pp.data, distributions = c("normal","poisson"),
method = "Stan")
dat2
## data object for spatial fusion modeling with Stan consisting of:
## - 1 geostatistical variable(s), with 200 locations
## - 1 lattice variable(s), with 150 sampling point locations
## - 1 point pattern variable(s), with 100 gridded locations
##
## Provide this object as 'data' argument in fusion() to fit a spatial fusion model.
mod_fit <- fitted(mod, type = "link")
oldpar <- par(mfrow = c(1,2))
plot(dat$data$Y_point[[1]], mod_fit$point1, xlab = "Observed", ylab = "Fitted")
abline(0,1)
plot(log(dat$data$Y_area[[1]]), mod_fit$area1, xlab = "Observed", ylab = "Fitted")
abline(0,1)
## Model:
## geostatistical formula: outcome ~ cov.point
## lattice formula: outcome ~ cov.area
## point pattern variables: 1
## latent process(es): 1
## --------------
## Fixed effect coefficients:
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## intercept (beta_p[1,1]) 1.4 0.0190 0.23 0.93 1.2 1.4 1.6 1.8 140 1
## cov.point (beta_p[1,2]) 4.9 0.0170 0.14 4.60 4.8 4.9 5.0 5.2 74 1
## intercept (beta_a[1,1]) 1.5 0.0069 0.13 1.20 1.4 1.5 1.5 1.7 360 1
## cov.area (beta_a[1,2]) 1.6 0.0074 0.13 1.40 1.6 1.6 1.7 1.9 290 1
##
## Latent parameters:
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## tau_sq[1] 0.42 0.061 0.24 0.170 0.26 0.35 0.49 1.10 16 1.1
## phi[1] 0.51 0.011 0.11 0.330 0.43 0.49 0.55 0.78 100 1.0
## Z_1[1] 2.10 0.021 0.15 1.800 2.00 2.10 2.20 2.40 48 1.1
## Z_2[1] 0.40 0.018 0.23 0.009 0.26 0.39 0.53 0.88 160 1.0
## Z_3[1] 1.40 0.015 0.20 1.100 1.30 1.40 1.50 1.80 170 1.0
## Enter <return> to proceed ...