Setup

Load packages & functions

source('_data/setup.R')

suppressPackageStartupMessages({
  require(randomForest)
  require(pdp)
  require(BAMMtools)
  require(pROC)
  require(PRROC)
  require(ggbeeswarm)
  require(iml)
  require(ggrepel)
})

label_wrap <- function(variable, value) {
  lapply(strwrap(as.character(value), width=25, simplify=FALSE), paste, collapse="\n")
}

theme_set(theme_classic() + theme(text = element_text(family = 'Segoe UI', size = 9)))

transfm <- function(df, xname) df %>% 
  mutate(x = if(grepl('log', xname)) 10^get(xname)-1 else get(xname))

# colors.hev <- scico(5, palette = 'lajolla')[2:4]
colors.hev <- scico(8, palette = 'lapaz')[c(7,5,3)]
# colors.hev <- c(
#   rgb(242,168,168, maxColorValue = 255), 
#   rgb(164,24,24, maxColorValue = 255),
#   rgb(75,13,13, maxColorValue = 255))

ctycol <- scico(7,palette='roma')[2]
saccol <- roma.colors[5]

Load Sherlock results

nsamp <- 1000

Statewide

## 1981 - all variables

load('_results/county_rfmodel_1981.Rdata')
f.cty.1981 <- f.rf
df.pca.cty.1981 <- df.pca
df.train.cty.1981 <- df.train
df.test.cty.1981 <- df.test
obs.cty.1981 <- obs.id
samp.cty.1981 <- sample(1:nrow(df.pca.cty.1981), nsamp)

load(paste0('_results/county_iml_1981.Rdata'))
ale.cty.1981 <- iml.ale
load(paste0('_results/county_shap_1981.Rdata'))
shap.cty.1981 <- shap
## 2009 - all variables

load('_results/county_rfmodel_2009.Rdata')
f.cty.2009 <- f.rf
df.pca.cty.2009 <- df.pca
df.train.cty.2009 <- df.train
df.test.cty.2009 <- df.test
obs.cty.2009 <- obs.id
samp.cty.2009 <- sample(1:nrow(df.pca.cty.2009), nsamp)

load(paste0('_results/county_iml_2009.Rdata'))
ale.cty.2009 <- iml.ale
load(paste0('_results/county_shap_2009.Rdata'))
shap.cty.2009 <- shap

Sacramento

## 1981 - all variables

load('_results/sac_rfmodel_1981.Rdata')
f.sac.1981 <- f.rf
df.pca.sac.1981 <- df.pca
df.train.sac.1981 <- df.train
df.test.sac.1981 <- df.test
obs.sac.1981 <- obs.id
samp.sac.1981 <- sample(1:nrow(df.pca.sac.1981), nsamp)

load(paste0('_results/sac_iml_1981.Rdata'))
ale.sac.1981 <- iml.ale
load(paste0('_results/sac_shap_1981.Rdata'))
shap.sac.1981 <- shap
## 2009 - all variables

load('_results/sac_rfmodel_2009.Rdata')
f.sac.2009 <- f.rf
df.pca.sac.2009 <- df.pca
df.train.sac.2009 <- df.train
df.test.sac.2009 <- df.test
obs.sac.2009 <- obs.id
samp.sac.2009 <- sample(1:nrow(df.pca.sac.2009), nsamp)

load(paste0('_results/sac_iml_2009.Rdata'))
ale.sac.2009 <- iml.ale
load(paste0('_results/sac_shap_2009.Rdata'))
shap.sac.2009 <- shap

Remove extraneous variables

rm(f.rf, df.pca, df.train, df.test, obs.id, iml.ale, shap)

TABLE 2: Dataset statistics by spatial and temporal scale

rbind(
  df.train.cty.1981 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Statewide', temp = '1981-2021'),
  df.test.cty.1981 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Statewide', temp = '1981-2021'),
  df.train.cty.2009 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Statewide', temp = '2009-2021'),
  df.test.cty.2009 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Statewide', temp = '2009-2021'),
  df.train.sac.1981 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Sacramento', temp = '1981-2021'),
  df.test.sac.1981 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Sacramento', temp = '1981-2021'),
  df.train.sac.2009 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Sacramento', temp = '2009-2021'),
  df.test.sac.2009 %>% count(y) %>% 
    summarize(total = sum(n), yes = n[y=='yes']) %>% 
    mutate(spat = 'Sacramento', temp = '2009-2021')
) %>% 
  group_by(spat = factor(spat, levels = c('Statewide','Sacramento')),temp) %>% 
  summarize(across(everything(), sum)) %>% 
  mutate(pct = yes/total) %>% 
  ungroup %>% 
  gt %>% 
  fmt_markdown(c(spat, temp)) %>% 
  fmt_number(c(total, yes), decimals = 0) %>% 
  fmt_percent(pct) %>% 
  cols_label(
    spat = html('Spatial<br>Scale'),
    temp = html('Temporal<br>Scale'),
    total = html('Number of<br>Records'),
    yes = html('Damaging<br>Records'), 
    pct = html('Class<br>Balance')) %>% 
  tab_header('Dataset Statistics by Spatial and Temporal Scale') %>% 
  tab_options(
    heading.background.color = '#d9d9d9', 
    column_labels.background.color = '#f2f2f2')
Dataset Statistics by Spatial and Temporal Scale
Spatial
Scale
Temporal
Scale
Number of
Records
Damaging
Records
Class
Balance
Statewide

1981-2021

17,265 863 5.00%
Statewide

2009-2021

5,659 234 4.14%
Sacramento

1981-2021

145,977 874 0.60%
Sacramento

2009-2021

47,776 114 0.24%

Calculate performance metrics

Statewide

data.cty.1981 <- 
  predict(f.cty.1981$finalModel, df.test.cty.1981, type = 'prob') %>% 
  as.data.frame %>% 
  transmute(pred = yes, obs = df.test.cty.1981$y) %>% 
  arrange(pred)
metrics.cty.1981 <- 
  foreach (i = (0:100)/100, .combine = 'rbind') %do% {
    caret::confusionMatrix(
      data = factor(ifelse(data.cty.1981$pred > i, 'yes', 'no'), levels = c('no','yes')), 
      reference = data.cty.1981$obs,
      positive = 'yes')$byClass %>% c(i = i)
  } %>% data.frame

pr.auc.cty.1981 <- pr.curve(
  scores.class0 = data.cty.1981$pred, 
  weights.class0 = data.cty.1981$obs=='yes')$auc.integral
roc.auc.cty.1981 <- roc(response = data.cty.1981$obs=='yes', predictor = data.cty.1981$pred)$auc %>% toNumber
stats.cty.1981 <- confusionMatrix(predict(f.cty.1981$finalModel, df.test.cty.1981), df.test.cty.1981$y, positive = 'yes')
stats.cty.1981 <- c(stats.cty.1981$overall, stats.cty.1981$byClass)
data.cty.2009 <- 
  predict(f.cty.2009$finalModel, df.test.cty.2009, type = 'prob') %>% 
  as.data.frame %>% 
  transmute(pred = yes, obs = df.test.cty.2009$y) %>% 
  arrange(pred)
metrics.cty.2009 <- 
  foreach (i = (0:100)/100, .combine = 'rbind') %do% {
    caret::confusionMatrix(
      data = factor(ifelse(data.cty.2009$pred > i, 'yes', 'no'), levels = c('no','yes')), 
      reference = data.cty.2009$obs,
      positive = 'yes')$byClass %>% c(i = i)
  } %>% data.frame

pr.auc.cty.2009 <- pr.curve(
  scores.class0 = data.cty.2009$pred, 
  weights.class0 = data.cty.2009$obs=='yes')$auc.integral
roc.auc.cty.2009 <- roc(response = data.cty.2009$obs=='yes', predictor = data.cty.2009$pred)$auc %>% toNumber
stats.cty.2009 <- confusionMatrix(predict(f.cty.2009$finalModel, df.test.cty.2009), df.test.cty.2009$y, positive = 'yes')
stats.cty.2009 <- c(stats.cty.2009$overall, stats.cty.2009$byClass)

Sacramento

data.sac.1981 <- 
  predict(f.sac.1981$finalModel, df.test.sac.1981, type = 'prob') %>% 
  as.data.frame %>% 
  transmute(pred = yes, obs = df.test.sac.1981$y) %>% 
  arrange(pred)
metrics.sac.1981 <- 
  foreach (i = (0:100)/100, .combine = 'rbind') %do% {
    caret::confusionMatrix(
      data = factor(ifelse(data.sac.1981$pred > i, 'yes', 'no'), levels = c('no','yes')), 
      reference = data.sac.1981$obs,
      positive = 'yes')$byClass %>% c(i = i)
  } %>% data.frame

pr.auc.sac.1981 <- pr.curve(
  scores.class0 = data.sac.1981$pred, 
  weights.class0 = data.sac.1981$obs=='yes')$auc.integral
roc.auc.sac.1981 <- roc(response = data.sac.1981$obs=='yes', predictor = data.sac.1981$pred)$auc %>% toNumber
stats.sac.1981 <- confusionMatrix(predict(f.sac.1981$finalModel, df.test.sac.1981), df.test.sac.1981$y, positive = 'yes')
stats.sac.1981 <- c(stats.sac.1981$overall, stats.sac.1981$byClass)
data.sac.2009 <- 
  predict(f.sac.2009$finalModel, df.test.sac.2009, type = 'prob') %>% 
  as.data.frame %>% 
  transmute(pred = yes, obs = df.test.sac.2009$y) %>% 
  arrange(pred)
metrics.sac.2009 <- 
  foreach (i = (0:100)/100, .combine = 'rbind') %do% {
    caret::confusionMatrix(
      data = factor(ifelse(data.sac.2009$pred > i, 'yes', 'no'), levels = c('no','yes')), 
      reference = data.sac.2009$obs,
      positive = 'yes')$byClass %>% c(i = i)
  } %>% data.frame

pr.auc.sac.2009 <- pr.curve(
  scores.class0 = data.sac.2009$pred, 
  weights.class0 = data.sac.2009$obs=='yes')$auc.integral
roc.auc.sac.2009 <- roc(response = data.sac.2009$obs=='yes', predictor = data.sac.2009$pred)$auc %>% toNumber
stats.sac.2009 <- confusionMatrix(predict(f.sac.2009$finalModel, df.test.sac.2009), df.test.sac.2009$y, positive = 'yes')
stats.sac.2009 <- c(stats.sac.2009$overall, stats.sac.2009$byClass)

Report model performance

stats.all <- 
  rbind(
    data.frame(t(stats.cty.1981)) %>% cbind(spat = 'Statewide', temp = '1981-2021'),
    data.frame(t(stats.cty.2009)) %>% cbind(spat = 'Statewide', temp = '2009-2021'),
    data.frame(t(stats.sac.1981)) %>% cbind(spat = 'Sacramento', temp = '1981-2021'),
    data.frame(t(stats.sac.2009)) %>% cbind(spat = 'Sacramento', temp = '2009-2021')) %>%
  select(Accuracy, Sensitivity, Specificity, Precision, Recall, Balanced.Accuracy, spat, temp) %>% 
  mutate(
    PR.AUC = c(pr.auc.cty.1981, pr.auc.cty.2009, pr.auc.sac.1981, pr.auc.sac.2009),
    ROC.AUC = c(roc.auc.cty.1981, roc.auc.cty.2009, roc.auc.sac.1981, roc.auc.sac.2009),
    PR.null = c(
      sum(df.test.cty.1981$y=='yes')/nrow(df.test.cty.1981),
      sum(df.test.cty.2009$y=='yes')/nrow(df.test.cty.2009),
      sum(df.test.sac.1981$y=='yes')/nrow(df.test.sac.1981),
      sum(df.test.sac.2009$y=='yes')/nrow(df.test.sac.2009)),
    ROC.null = rep(0.5,4),
    Balanced.null = rep(0.5,4))

metrics.all <- 
  rbind(
    metrics.cty.1981 %>% mutate(spat = 'Statewide', temp = '1981-2021'),
    metrics.cty.2009 %>% mutate(spat = 'Statewide', temp = '2009-2021'),
    metrics.sac.1981 %>% mutate(spat = 'Sacramento', temp = '1981-2021'),
    metrics.sac.2009 %>% mutate(spat = 'Sacramento', temp = '2009-2021')) %>% 
  mutate(xlab = paste(spat, temp, sep = '\n'))

FIG 1: Model performance metrics for the four RF models

g2 <- ggplot(stats.all) + 
  geom_hline(
    aes(yintercept = PR.null, color = spat, alpha = temp),
    linetype = 'dashed', show.legend = FALSE) + 
  geom_line(
    data = metrics.all,
    aes(x = Recall, y = Precision, group = xlab, color = spat, alpha = temp),
    size = 0.75) + 
  geom_point(
    aes(x = Recall, y = Precision, color = spat, alpha = temp),
    size = 2, show.legend = FALSE) + 
  scale_x_continuous(limits = c(0,1), expand = expansion(mult = c(0,0.01))) + 
  scale_y_continuous(limits = c(0,1), expand = expansion(mult = c(0,0.01))) + 
  scale_color_manual('Spatial\nResolution', values = c(saccol, ctycol)) +
  scale_alpha_manual('Temporal\nResolution', values = c(1,0.4)) + 
  guides(
    color = guide_legend(override.aes = list(size = 1.25)), 
    alpha = guide_legend(override.aes = list(size = 1.25, alpha = c(1,0.25)))) + 
  coord_fixed(clip = 'off') + 
  theme(
    text = element_text(family = 'Segoe UI', size = 9),
    panel.grid.major = element_line(size = 0.25), 
    legend.position = 'bottom')

g1 <- ggplot(stats.all) + 
  geom_line(
    data = metrics.all,
    aes(x = Specificity, y = Sensitivity, group = xlab, color = spat, alpha = temp),
    size = 0.75) + 
  geom_point(
    aes(x = Specificity, y = Sensitivity, color = spat, alpha = temp),
    size = 2, show.legend = FALSE) + 
  scale_x_reverse(limits = c(1,0), expand = expansion(mult = c(0,0.01))) + 
  scale_y_continuous(limits = c(0,1), expand = expansion(mult = c(0,0.01))) + 
  geom_abline(slope = 1, intercept = 1, linetype = 'dashed') + 
  scale_color_manual('Spatial\nResolution', values = c(saccol, ctycol)) +
  scale_alpha_manual('Temporal\nResolution', values = c(1,0.4)) + 
  guides(
    color = guide_legend(override.aes = list(size = 1.25)), 
    alpha = guide_legend(override.aes = list(size = 1.25, alpha = c(1,0.25)))) + 
  coord_fixed(clip = 'off') + 
  theme(
    text = element_text(family = 'Segoe UI', size = 9),
    panel.grid.major = element_line(size = 0.25), 
    legend.position = 'bottom')

g1 + g2 + guide_area() + 
  plot_layout(design = 'ab\ncc', heights = c(100,1), guides = 'collect') + 
  plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') & 
  theme(
    legend.background = element_blank(),
    plot.tag = element_text(face = 'bold'), legend.box = 'horizontal')
ggsave('_figures/fig1_performance.png', width = 6, height = 3)

TABLE 3: Random forest model performance metrics

stats.all %>% 
  select(spat, temp, ROC.AUC, ROC.null, PR.AUC, PR.null, Balanced.Accuracy, Balanced.null) %>% 
  gt %>% 
  fmt_markdown(c(spat,temp)) %>% 
  fmt_number(c(starts_with('PR'), starts_with('ROC'), starts_with('Balanced')), decimals = 3) %>% 
  tab_spanner(label = 'ROC-AUC', columns = c(ROC.AUC, ROC.null)) %>% 
  tab_spanner(label = 'PR-AUC', columns = c(PR.AUC, PR.null)) %>% 
  tab_spanner(label = html('Balanced<br>Accuracy'), columns = c(Balanced.Accuracy, Balanced.null)) %>% 
  cols_label(
    spat = html('Spatial<br>Scale'),
    temp = html('Temporal<br>Scale'),
    ROC.AUC = 'RF',
    ROC.null = 'Null',
    PR.AUC = 'RF',
    PR.null = 'Null',
    Balanced.Accuracy = 'RF',
    Balanced.null = 'Null'
  ) %>% 
  tab_header('Random Forest Model Performance Metrics') %>% 
  tab_options(
    heading.background.color = '#d9d9d9', 
    column_labels.background.color = '#f2f2f2')
Random Forest Model Performance Metrics
Spatial
Scale
Temporal
Scale
ROC-AUC PR-AUC Balanced
Accuracy
RF Null RF Null RF Null

Statewide

1981-2021

0.914 0.500 0.435 0.051 0.707 0.500

Statewide

2009-2021

0.914 0.500 0.353 0.041 0.657 0.500

Sacramento

1981-2021

0.959 0.500 0.311 0.007 0.761 0.500

Sacramento

2009-2021

0.898 0.500 0.046 0.002 0.673 0.500

Feature importance results

plotimp.cty.1981 <- shap.cty.1981 %>% 
  as.data.frame %>% select(-y) %>% 
  summarize(across(everything(), ~mean(abs(.)))) %>% 
  pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>% 
  left_join(dimension, by = c('varnames' = 'var')) %>% 
  arrange(shap) %>% 
  filter(shap >= shap[varnames=='noise']) %>% 
  mutate(
    rank = nrow(.):1,
    varnames = factor(varnames, levels = varnames),
    fullname = factor(fullname, levels = fullname),
    shap = c(0, shap[-1]),
    dim = factor(dim, levels = c('H','E','V')))
plotimp.cty.2009 <- shap.cty.2009 %>% 
  as.data.frame %>% select(-y) %>% 
  summarize(across(everything(), ~mean(abs(.)))) %>% 
  pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>% 
  left_join(dimension, by = c('varnames' = 'var')) %>% 
  arrange(shap) %>% 
  filter(shap >= shap[varnames=='noise']) %>% 
  mutate(
    rank = nrow(.):1,
    varnames = factor(varnames, levels = varnames),
    fullname = factor(fullname, levels = fullname),
    shap = c(0, shap[-1]),
    dim = factor(dim, levels = c('H','E','V')))

plotimp.sac.1981 <- shap.sac.1981 %>% 
  as.data.frame %>% select(-y) %>% 
  summarize(across(everything(), ~mean(abs(.)))) %>% 
  pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>% 
  left_join(dimension, by = c('varnames' = 'var')) %>% 
  arrange(shap) %>% 
  filter(shap >= shap[varnames=='noise']) %>% 
  mutate(
    rank = nrow(.):1,
    varnames = factor(varnames, levels = varnames),
    fullname = factor(fullname, levels = fullname),
    shap = c(0, shap[-1]),
    dim = factor(dim, levels = c('H','E','V')))
plotimp.sac.2009 <- shap.sac.2009 %>% 
  as.data.frame %>% select(-y) %>% 
  summarize(across(everything(), ~mean(abs(.)))) %>% 
  pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>% 
  left_join(dimension, by = c('varnames' = 'var')) %>% 
  arrange(shap) %>% 
  filter(shap >= shap[varnames=='noise']) %>% 
  mutate(
    rank = nrow(.):1,
    varnames = factor(varnames, levels = varnames),
    fullname = factor(fullname, levels = fullname),
    shap = c(0, shap[-1]),
    dim = factor(dim, levels = c('H','E','V')))

plotimp.all <- 
  rbind(
    plotimp.cty.1981 %>% mutate(spat = 'Statewide', temp = '1981-2021'),
    plotimp.cty.2009 %>% mutate(spat = 'Statewide', temp = '2009-2021'),
    plotimp.sac.1981 %>% mutate(spat = 'Sacramento', temp = '1981-2021'),
    plotimp.sac.2009 %>% mutate(spat = 'Sacramento', temp = '2009-2021')) %>% 
  filter(varnames != 'noise')

FIG 2: SHAP relative global feature importance by risk dimension and concept

df <- plotimp.all %>% 
  group_by(spat,temp,dim,concept) %>% 
  summarize(imptce = Sum(shap)) %>% 
  group_by(spat,temp) %>% 
  mutate(imptce = imptce/sum(imptce)) %>% 
  ungroup
df <- df %>% 
  group_by(spat,temp,dim) %>% 
  summarize(imptce.hev = Sum(imptce)) %>% 
  mutate(dim = factor(dim, levels = c('H','E','V'))) %>% 
  arrange(rev(dim)) %>% 
  mutate(
    start = cumsum(imptce.hev), 
    end = c(0, start[-length(dim)]),
    lab = cbind(start,end) %>% apply(1,mean)) %>% 
  ungroup %>% 
  select(-start, -end) %>% 
  left_join(
    df %>% select(spat,temp,dim,concept,imptce), 
    by = c('spat','temp','dim')) %>%
  group_by(spat,temp,dim) %>% 
  mutate(
    id = 1:length(dim),
    lab = case_when(id==1 ~ lab)) %>% 
  select(-id) %>%
  ungroup %>% 
  group_by(dim) %>% arrange(factor(concept)) %>% 
  mutate(
    conceptid = paste(dim, concept, sep = '-') %>% 
      factor(levels = rev(c(
        'H-ARchar','H-antecedent','H-modes-lsm','E-people','E-housing',
        'E-indicators','V-social','V-infra','V-memory')))) %>% 
  arrange(conceptid) %>% 
  mutate(xlab = paste(spat, temp, sep = '\n')) %>% 
  mutate(xlab = factor(xlab, levels = rev(levels(factor(xlab))))) %>% 
  ungroup %>% 
  mutate(
    xlab = paste(spat,temp,sep = '\n') %>% factor(
      levels = c('Statewide\n1981-2021', 'Statewide\n2009-2021', 'Sacramento\n1981-2021', 'Sacramento\n2009-2021')))
ggplot(df) + 
  geom_col(
    aes(x = as.numeric(xlab), y = imptce, group = dim),
    width = 0.75, fill = 'white', color = NA) + 
  geom_col(
    aes(x = as.numeric(xlab), y = imptce, group = dim, fill = dim, alpha = conceptid),
    width = 0.75) + 
  geom_col(
    data = df %>% 
      group_by(spat,temp,dim) %>% 
      summarize(
        imptce.hev = sum(imptce), dim = dim[1], xlab = xlab[1]) %>% 
      ungroup,
    aes(x = as.numeric(xlab), y = imptce.hev, group = dim), 
    width = 0.75, size = 0.25, fill = NA, color = 'grey10') + 
  geom_text(
    aes(x = as.numeric(xlab), y = lab, label = percent(imptce.hev, accuracy=1)),
    family = 'Segoe UI', fontface = 'bold', size = 9/.pt) + 
  geom_text_repel(
    data = df %>% 
      filter(spat == 'Statewide' & temp == '1981-2021') %>% 
      mutate(
        start = cumsum(imptce),
        end = c(0, start[-length(concept)]),
        lab = cbind(start,end) %>% apply(1,mean),
        concept_name = case_match(
          concept, 
          'ARchar' ~ 'AR Characteristics',
          'antecedent' ~ 'Antecedent\nConditions',
          'modes-lsm' ~ 'Climate Modes +\nLand Surface',
          'people' ~ 'Population',
          'housing' ~ 'Housing',
          'indicators' ~ 'Insurance Takeup', 
          'social' ~ 'Social Vulnerability',
          'infra' ~ 'Infrastructural\nVulnerability',
          'memory' ~ 'Flood Experience')),
    aes(x = as.numeric(xlab)-0.75/2, y = lab, label = concept_name),
    direction = 'y', min.segment.length = 0,
    family = 'Segoe UI', size = 8/.pt, hjust = 1, nudge_x = -0.225, lineheight = 0.8) + 
  scale_fill_manual(
    'Dimension',
    breaks = c('H','E','V'),
    labels = c('Hazard','Exposure','Vulnerability'),
    values = colors.hev) +
  scale_alpha_manual(
    'Dimension',
    values = c(1,0.8,0.6,1,0.75,0.5,1,0.7,0.4),
    guide = guide_none()) +
  scale_x_continuous(
    breaks = 1:4, labels = levels(df$xlab), 
    expand = expansion(add = c(1.25,0.25))) + 
  scale_y_origin('SHAP Feature Importance') + 
  theme(
    legend.position = 'bottom',
    panel.grid.major.y = element_line(size = 0.25),
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 9, color = 'black'),
    legend.box.margin = ggplot2::margin(-5,0,0,0))
ggsave('_figures/fig2_imptce.png', width = 6, height = 4)

FIG 3: SHAP feature importance rank

df <- plotimp.all %>% 
  mutate(shap = setNA(shap,0)) %>%
  pivot_wider(names_from = c(spat,temp), values_from = c(shap,rank)) %>% 
  mutate(
    across(starts_with('shap'), ~setNA(.,0)),
    across(starts_with('rank'), ~setNA(.,30))) %>%
  mutate(totalrank = select(., starts_with('rank')) %>% apply(1,sum)) %>% 
  pivot_longer(
    cols = c(starts_with('rank'), starts_with('shap')), 
    names_to = c('shaprank', 'spat', 'temp'), names_sep = '_') %>% 
  pivot_wider(names_from = shaprank, values_from = value) %>% 
  arrange(desc(totalrank)) %>% 
  mutate(
    fullname = factor(fullname, levels = unique(fullname)),
    xlab = paste(spat,temp,sep = '\n') %>% factor(
      levels = c('Statewide\n1981-2021', 'Statewide\n2009-2021', 'Sacramento\n1981-2021', 'Sacramento\n2009-2021')))

ggplot(df) + 
  geom_tile(
    aes(x = as.numeric(xlab), y = fullname, fill = shap), 
    width = 0.9) + 
  scale_fill_gradient(
    'SHAP Feature     \nContribution     ', 
    low = 'white', high = '#50245b', label = percent) + 
  geom_text(
    aes(x = 0.85, y = fullname, label = dim, color = dim),
    family = 'Segoe UI', size = 9/.pt, fontface = 'bold', nudge_x = -0.575,
    show.legend = FALSE) + 
  scale_color_manual(values = c('#dda077', colors.hev[-1])) + 
  ggnewscale::new_scale_color() + 
  geom_text(
    data = df %>% filter(rank <= 10),
    aes(x = as.numeric(xlab), y = fullname, label = rank, color = rank<=5),
    family = 'Segoe UI', size = 8/.pt,
    show.legend = FALSE) + 
  scale_color_manual(values = c('black','white')) + 
  scale_x_continuous(
    breaks = 1:4, labels = levels(df$xlab),
    position = 'top', expand = expansion(add = c(0.15,0.25))) + 
  scale_y_discrete(expand = expansion(0.035)) + 
  theme(
    axis.title = element_blank(), 
    axis.text.x = element_text(size = 9, color = 'black'),
    axis.text.y = element_text(size = 8, color = 'black'),
    axis.line = element_blank(),
    panel.background = element_rect(fill = 'white', color = 'grey70'),
    legend.position = 'bottom',
    legend.box.margin = ggplot2::margin(-5,0,0,0),
    legend.key.height = unit(10, 'pt'),
    legend.key.width = unit(25, 'pt'))
ggsave('_figures/fig3_rank_new.png', width = 6, height = 5)

Feature impact results

FIG 4: Top three hazard, exposure, and vulnerability features in the 1981 statewide model

## set plot parameters
ptsize <- 0.5
linesize <- 0.75
ptcol <- 'grey50'

rangehaz <- c(-0.3, 0.25)
rangeexp <- c(-0.15, 0.1)
rangevuln <- c(-0.15, 0.1)

gbase <- ggplot() + 
    geom_hline(yintercept = 0, color = 'grey50', size = 0.25) + 
  scale_y_continuous(
    'SHAP Feature Contribution',
    labels = percent,
    sec.axis = sec_axis(~., name = 'Accumulated Local Effects', labels = percent)) +
  theme(
    text = element_text(size = 9),
    panel.grid.major.y = element_line(size = 0.25),
    axis.line.y.right = element_line(),
    axis.ticks.y.right = element_line(),
    axis.title.y.left = element_blank(),
    axis.title.y.right = element_blank(),
    axis.text.y.left = element_blank(),
    axis.text.y.right = element_blank())

Hazard

plotimp.cty.1981 %>% 
  filter(dim == 'H') %>% arrange(desc(shap)) %>% 
  head(3) %>% pull(varnames) %>% paste
## [1] "logprecip_total" "maxivt"          "duration"
xname <- 'logprecip_total'
g1h <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    title = 'Hazard',
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  scale_x_continuous(expand = expansion(mult = c(0.02,0.05))) + 
  coord_cartesian(xlim = c(0,250), ylim = rangehaz) +
  theme(
    plot.title = element_text(face = 'bold', color = '#dda077'),
    axis.text.y.left = element_text())

xname <- 'maxivt'
g2h <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  coord_cartesian(xlim = c(250,NA), ylim = rangehaz) + 
  labs(
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  scale_x_continuous(breaks = 250*(0:10), expand = expansion(mult = c(0,0.05)))

xname <- 'duration'
g3h <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  scale_x_continuous(expand = expansion(mult = c(0.02,0.05))) + 
  coord_cartesian(xlim = c(0,150), ylim = rangehaz) +
  theme(
    axis.text.y.right = element_text())

Exposure

plotimp.cty.1981 %>% 
  filter(dim == 'E') %>% arrange(desc(shap)) %>% 
  head(3) %>% pull(varnames) %>% paste
## [1] "loghu"              "pop_pct_floodplain" "pct_sfh"
xname <- 'loghu'
g1e <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    title = 'Exposure',
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  scale_x_log10(
    breaks = 10^(1:10), 
    labels = c('10','100','1K','10K','100K','1M','10M','100M','1B','10B')) + 
  annotation_logticks(sides = 'b', size = 0.25, color = 'grey25') + 
  coord_cartesian(ylim = rangeexp) + 
  theme(
    plot.title = element_text(face = 'bold', color = '#71938f'),
    axis.title.y.left = element_text(),
    axis.text.y.left = element_text())

xname <- 'pop_pct_floodplain'
g2e <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  scale_x_continuous(labels = percent, expand = expansion(mult = c(0.02,0.05))) + 
  coord_cartesian(xlim = c(0,0.4), ylim = rangeexp)

xname <- 'pct_sfh'
g3e <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 24)) +
  scale_x_continuous(labels = percent, expand = expansion(mult = 0.05)) + 
  coord_cartesian(xlim = c(0.4,NA), ylim = rangeexp) + 
  theme(
    axis.title.y.right = element_text(),
    axis.text.y.right = element_text())

Vulnerability

plotimp.cty.1981 %>% 
  filter(dim == 'V') %>% arrange(desc(shap)) %>% 
  head(3) %>% pull(varnames) %>% paste
## [1] "CRS"         "hhincome22"  "cal_polburd"
xname <- 'CRS'
g1v <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = jitter(x), y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(.borders > 2) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    title = 'Vulnerability',
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  coord_cartesian(ylim = rangevuln) +
  scale_x_continuous(breaks = 2:10) + 
  theme(
    plot.title = element_text(face = 'bold', color = colors.hev[3]),
    axis.text.y.left = element_text())

xname <- 'hhincome22'
g2v <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  scale_x_continuous(labels = comma_format(scale = 1e-3, prefix = '$', suffix = 'K')) + 
  coord_cartesian(ylim = rangevuln)

xname <- 'cal_polburd'
g3v <- gbase + 
  geom_point(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% 
      mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
    aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) + 
  geom_line(
    data = ale.cty.1981 %>% 
      filter(.feature == xname) %>% 
      mutate(.borders = toNumber(.borders)) %>% 
      filter(
        .borders > quantile(df.pca.cty.1981[,xname],0.025) &
          .borders < quantile(df.pca.cty.1981[,xname],0.975)) %>% 
      mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
    aes(x = x, y = .value), size = linesize) +
  labs(
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
  coord_cartesian(ylim = rangevuln) +
  theme(
    axis.text.y.right = element_text())
g1h + g2h + g3h +
  g1e + g2e + g3e + 
  g1v + g2v + g3v + 
  plot_layout(nrow = 3) +
  plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') &
  theme(
    legend.position = 'none',
    plot.tag = element_text(face = 'bold'),
    plot.tag.position = c(0.12,0.975)) #1.01 without the title
ggsave('_figures/fig4_impact.png', width = 6, height = 6)

Feature interaction results

Fit new ALE models by AR category

df.subset <- df.pca.cty.1981 %>% filter(cat==1)
predictor1 <- Predictor$new(
  f.cty.1981$finalModel,
  data = df.subset,
  y = df.subset$y=='yes',
  class = 'yes',
  type = 'prob')

df.subset <- df.pca.cty.1981 %>% filter(cat==2)
predictor2 <- Predictor$new(
  f.cty.1981$finalModel,
  data = df.subset,
  y = df.subset$y=='yes',
  class = 'yes',
  type = 'prob')

df.subset <- df.pca.cty.1981 %>% filter(cat==3)
predictor3 <- Predictor$new(
  f.cty.1981$finalModel,
  data = df.subset,
  y = df.subset$y=='yes',
  class = 'yes',
  type = 'prob')

df.subset <- df.pca.cty.1981 %>% filter(cat==4)
predictor4 <- Predictor$new(
  f.cty.1981$finalModel,
  data = df.subset,
  y = df.subset$y=='yes',
  class = 'yes',
  type = 'prob')

df.subset <- df.pca.cty.1981 %>% filter(cat==5)
predictor5 <- Predictor$new(
  f.cty.1981$finalModel,
  data = df.subset,
  y = df.subset$y=='yes',
  class = 'yes',
  type = 'prob')

bycat.cty.1981 <- 
  foreach (
    xname = plotimp.cty.1981 %>% filter(dim %in% c('E','V')) %>% pull(varnames) %>% paste,
    .combine = 'rbind') %do% {
      df <- rbind(
        FeatureEffect$new(predictor1, feature = xname, method = 'ale', grid.size = 50) %>%
          .$results %>% setNames(c('.type', '.value', '.borders')) %>%
          mutate(.borders = toNumber(.borders), .feature = xname, cat = 1),
        FeatureEffect$new(predictor2, feature = xname, method = 'ale', grid.size = 50) %>%
          .$results %>% setNames(c('.type', '.value', '.borders')) %>%
          mutate(.borders = toNumber(.borders), .feature = xname, cat = 2),
        FeatureEffect$new(predictor3, feature = xname, method = 'ale', grid.size = 50) %>%
          .$results %>% setNames(c('.type', '.value', '.borders')) %>%
          mutate(.borders = toNumber(.borders), .feature = xname, cat = 3),
        FeatureEffect$new(predictor4, feature = xname, method = 'ale', grid.size = 50) %>%
          .$results %>% setNames(c('.type', '.value', '.borders')) %>%
          mutate(.borders = toNumber(.borders), .feature = xname, cat = 4),
        FeatureEffect$new(predictor5, feature = xname, method = 'ale', grid.size = 50) %>%
          .$results %>% setNames(c('.type', '.value', '.borders')) %>%
          mutate(.borders = toNumber(.borders), .feature = xname, cat = 5))
      if (xname %in% c('CRS', 'disasters', 'coastal')) df else {
        df %>%
          filter(
            .borders > quantile(df.pca.cty.1981[,xname],0.025) &
              .borders < quantile(df.pca.cty.1981[,xname],0.975))
      }
    }

Choose which interaction plots to feature

bycat.cty.1981 %>% 
  filter(!(.feature %in% c(
    'disasters', 'coastal', 'pct_white_nonhisp', 'pct_working', 'pct_mobile', 'pct_ownocc',
    'med_struct_age', 'hhincome22', 'cdc_theme4', 'cal_popchar', 'pop_pct_floodplain'))) %>%
  left_join(dimension, by = c('.feature' = 'var')) %>% 
ggplot() + 
  geom_hline(yintercept = 0, color = 'grey50', size = 0.25) + 
  geom_line(aes(x = .borders, y = .value, group = cat, color = factor(cat)), size = 1) + 
  geom_rug(
    data = df.pca.cty.1981 %>% 
      select(names(.)[names(.) %in% (dimension %>% filter(dim %in% c('E','V')) %>% pull(var))]) %>% 
      slice(sample(1:nrow(.),1000)) %>% 
      pivot_longer(everything(), names_to = '.feature', values_to = 'x') %>% 
      left_join(dimension, by = c('.feature' = 'var')) %>% 
      filter(!(.feature %in% c(
        'disasters', 'coastal', 'pct_white_nonhisp', 'pct_working', 'pct_mobile', 'pct_ownocc',
        'med_struct_age', 'hhincome22', 'cdc_theme4', 'cal_popchar', 'pop_pct_floodplain'))),
    aes(x = x)) + 
  facet_wrap(~paste(.feature,dim), scales = 'free') + 
  scale_color_manual('AR Category', values = rev(roma.colors)) + 
  theme(
    legend.position = 'bottom',
    panel.grid.major.y = element_line(size = 0.25))

FIG 5: Interactions between AR category and exposure and vulnerability features in the 1981 statewide model

yrange <- c(-0.24,0.12)
gbase <- ggplot() + 
  geom_hline(yintercept = 0, color = 'grey50', size = 0.25) + 
  scale_y_continuous(labels = percent, expand = expansion(0.025,0.035), limits = yrange) +
  scale_color_manual('AR Category', values = rev(roma.colors)) + 
  labs(y = 'Accumulated Local Effects') + 
  theme(
    text = element_text(size = 9), 
    legend.position = 'bottom',
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.margin = ggplot2::margin(0,0,-5,0),
    panel.grid.major.y = element_line(size = 0.25),
    plot.tag = element_text(size = 10, face = 'bold'),
    plot.tag.position = c(0.025,0.975),
    plot.margin = ggplot2::margin(16,2,2,2))
  
xname <- 'loghu'
g1 <- gbase + 
  geom_smooth(
    data = bycat.cty.1981 %>% 
      filter(.feature == xname) %>% 
      left_join(dimension, by = c('.feature' = 'var')) %>% 
      select(.borders, .value, cat) %>% 
      setNames(c(xname,'y','cat')) %>% transfm(.,xname),
    aes(x = x, y = y, group = factor(cat), color = factor(cat)),
    size = 0.75, se = FALSE, span = 0.25) + 
  scale_x_log10(
    breaks = 10^(1:10), 
    labels = c('10','100','1K','10K','100K','1M','10M','100M','1B','10B')) + 
  labs(
    title = '   Exposure',
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 18)) + 
  geom_rug(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
    aes(x = x), size = 0.25, alpha = 0.25) + 
  theme(
    plot.title = element_text(face = 'bold', color = '#71938f'),
    plot.tag.position = c(0.25,0.975))

xname <- 'pct_sfh'
g2 <- gbase + 
  geom_smooth(
    data = bycat.cty.1981 %>% 
      filter(.feature == xname) %>% 
      left_join(dimension, by = c('.feature' = 'var')) %>% 
      select(.borders, .value, cat) %>% 
      setNames(c(xname,'y','cat')) %>% transfm(.,xname),
    aes(x = x, y = y, group = factor(cat), color = factor(cat)),
    size = 0.75, se = FALSE, span = 0.25) + 
  scale_x_continuous(labels = percent, limits = c(0.4,NA)) +
  labs(x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 18)) + 
  geom_rug(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
    aes(x = x), size = 0.25, alpha = 0.25) + 
  theme(axis.title.y = element_blank(), axis.text.y = element_blank())

xname <- 'cal_polburd'
g3 <- gbase + 
  geom_smooth(
    data = bycat.cty.1981 %>% 
      filter(.feature == xname) %>% 
      left_join(dimension, by = c('.feature' = 'var')) %>% 
      select(.borders, .value, cat) %>% 
      setNames(c(xname,'y','cat')) %>% transfm(.,xname),
    aes(x = x, y = y, group = factor(cat), color = factor(cat)),
    size = 0.75, se = FALSE, span = 0.25) + 
  scale_x_continuous() +
  labs(
    title = '   Vulnerability',
    x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) + 
  geom_rug(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
    aes(x = x), size = 0.25, alpha = 0.25) + 
  theme(
    plot.title = element_text(face = 'bold', color = colors.hev[3]),
    axis.title.y = element_blank(), axis.text.y = element_blank())

xname <- 'CRS'
g4 <- gbase + 
  geom_line(
    data = bycat.cty.1981 %>% 
      filter(.feature == xname) %>% 
      left_join(dimension, by = c('.feature' = 'var')) %>% 
      select(.borders, .value, cat) %>% 
      setNames(c(xname,'y','cat')) %>% transfm(.,xname),
    aes(x = x, y = y, group = factor(cat), color = factor(cat)),
    size = 0.75, show.legend = FALSE) + 
  scale_x_continuous(breaks = 2:10) + 
  labs(x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 18)) + 
  geom_rug(
    data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
    aes(x = jitter(x)), size = 0.25, alpha = 0.25) + 
  theme(axis.title.y = element_blank(), axis.text.y = element_blank())
g1 + g2 + g3 + g4 + guide_area() + 
  plot_layout(design = 'abcd\neeee', guides = 'collect', heights = c(10,1)) + 
  plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')')
ggsave('_figures/fig5_interaction.png', width = 6.5, height = 2.5)