Improving Model Fitting Utilizing Geographical Complexity in Spatial Regression Tasks
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ───────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(geocomplexity)
econineq = sf::read_sf(system.file('extdata/econineq.gpkg',package = 'geocomplexity'))
wt1 = sdsfun::spdep_contiguity_swm(econineq,queen = TRUE,style = 'B')
Incorporating the geographical complexity of each independent variable into the model’s explanatory variables
mlr1 = lm(Gini ~ ., data = st_drop_geometry(econineq))
gc1 = geocd_vector(dplyr::select(econineq,-Gini),
wt = wt1, returnsf = FALSE)
mlr2 = lm(Gini ~ ., data = st_drop_geometry(econineq) %>%
dplyr::bind_cols(gc1))
eval_mlr = \(models,mnames){
R2 = purrr::map_dbl(models,\(m) summary(m)$r.squared)
AdjR2 = purrr::map_dbl(models,\(m) summary(m)$adj.r.squared)
t_eval = tibble::tibble(Model = mnames,
R2,AdjR2)
return(t_eval)
}
eval_mlr(list(mlr1,mlr2),
c("MLR","GCMLR")) |>
pander::pander()
Model | R2 | AdjR2 |
---|---|---|
MLR | 0.5846 | 0.5743 |
GCMLR | 0.6215 | 0.6024 |
Using spatial weight matrices that account for geographical complexity in spatial regression
library(spatialreg)
## Loading required package: spData
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
wtl1 = spdep::mat2listw(wt1)
## Warning in spdep::mat2listw(wt1): style is M (missing); style should be set to a valid value
## Warning in spdep::mat2listw(wt1): neighbour object has 2 sub-graphs
wt2 = geocs_swm(econineq,wt1,style = "W")
wtl2 = spdep::mat2listw(wt2,zero.policy = TRUE)
## Warning in spdep::mat2listw(wt2, zero.policy = TRUE): style is M (missing); style should be
## set to a valid value
slm1 = lagsarlm(Gini ~ ., data = st_drop_geometry(econineq),
listw = wtl1, Durbin = FALSE)
slm2 = lagsarlm(Gini ~ ., data = st_drop_geometry(econineq),
listw = wtl2, Durbin = FALSE)
sem1 = errorsarlm(Gini ~ ., data = st_drop_geometry(econineq),
listw = wtl1, Durbin = FALSE)
sem2 = errorsarlm(Gini ~ ., data = st_drop_geometry(econineq),
listw = wtl2, Durbin = FALSE)
eval_slm = \(models,mnames){
evt = tibble(Model = mnames,
AIC = purrr::map_dbl(models,AIC),
BIC = purrr::map_dbl(models,BIC),
logLik = purrr::map_dbl(models,logLik))
return(evt)
}
eval_slm(list(slm1,slm2,sem1,sem2),
c("SLM","GCSLM","SEM","GCSEM")) |>
pander::pander()
Model | AIC | BIC | logLik |
---|---|---|---|
SLM | -1355 | -1313 | 688.6 |
GCSLM | -1363 | -1321 | 692.6 |
SEM | -1468 | -1426 | 745 |
GCSEM | -1364 | -1322 | 693.1 |
geographical complexity-geographically weighted regression
g1 = GWmodel3::gwr_basic(
formula = Gini ~ .,
data = econineq,
bw = "AIC",
adaptive = TRUE
)
## Warning: st_centroid assumes attributes are constant over geometries
g1
## Geographically Weighted Regression Model
## ========================================
## Formula: Gini ~ .
## Data: econineq
## Kernel: gaussian
## Bandwidth: 28 (Nearest Neighbours) (Optimized accroding to AIC)
##
##
## Summary of Coefficient Estimates
## --------------------------------
## Coefficient Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -0.373 0.209 0.347 0.424 0.565
## Induscale -0.342 -0.239 -0.193 -0.147 -0.006
## IT -0.004 -0.003 -0.002 -0.002 0.004
## Income -0.000 0.000 0.000 0.000 0.000
## Sexrat -0.056 0.012 0.052 0.100 0.200
## Houseown 0.001 0.002 0.003 0.004 0.005
## Indemp -0.000 -0.000 -0.000 -0.000 -0.000
## Indcom 0.000 0.000 0.000 0.000 0.000
## Hiedu 0.000 0.002 0.002 0.002 0.004
##
##
## Diagnostic Information
## ----------------------
## RSS: 0.1482732
## ENP: 79.00112
## EDF: 253.9989
## R2: 0.8025199
## R2adj: 0.740855
## AIC: -1559.969
## AICc: -1460.302
g2 = gwr_geoc(formula = Gini ~ .,
data = econineq,
bw = "AIC",
adaptive = TRUE)
g2
## Geographical Complexity-Geographically Weighted Regression Model
## ================================================================
## Kernel: gaussian
## Bandwidth: 16 (Nearest Neighbours) (Optimized according to AIC)
## Alpha: 0.05
##
## Summary of Coefficient Estimates
## --------------------------------
## Coefficient Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -0.356 0.254 0.328 0.391 0.519
## Induscale -0.396 -0.267 -0.218 -0.178 0.028
## IT -0.004 -0.003 -0.002 -0.002 0.004
## Income 0.000 0.000 0.000 0.000 0.000
## Sexrat -0.045 0.036 0.052 0.093 0.271
## Houseown 0.001 0.002 0.003 0.003 0.004
## Indemp -0.000 -0.000 -0.000 -0.000 0.000
## Indcom 0.000 0.000 0.000 0.000 0.000
## Hiedu 0.000 0.001 0.002 0.002 0.004
##
## Diagnostic Information
## ----------------------
## RSS: 0.123
## ENP: 112.902
## EDF: 220.098
## R2: 0.836
## R2adj: 0.832
## AIC: -1607.940
## AICc: -1444.543
## RMSE: 0.019
tibble::tibble(
Model = c("GWR","GeoCGWR"),
R2 = c(g1$diagnostic$RSquare,g2$diagnostic$R2),
AdjR2 = c(g1$diagnostic$RSquareAdjust,g2$diagnostic$R2_Adj),
AICc = c(g1$diagnostic$AICc,g2$diagnostic$AICc),
RMSE = c(sqrt(g1$diagnostic$RSS / nrow(econineq)),g2$diagnostic$RMSE)
)|>
pander::pander()
Model | R2 | AdjR2 | AICc | RMSE |
---|---|---|---|---|
GWR | 0.8025 | 0.7409 | -1460 | 0.0211 |
GeoCGWR | 0.836 | 0.8319 | -1445 | 0.01923 |