source('_data/setup.R')
suppressPackageStartupMessages({
require(randomForest)
require(pdp)
require(BAMMtools)
require(pROC)
require(PRROC)
require(ggbeeswarm)
require(iml)
require(ggrepel)
})
<- function(variable, value) {
label_wrap lapply(strwrap(as.character(value), width=25, simplify=FALSE), paste, collapse="\n")
}
theme_set(theme_classic() + theme(text = element_text(family = 'Segoe UI', size = 9)))
<- function(df, xname) df %>%
transfm mutate(x = if(grepl('log', xname)) 10^get(xname)-1 else get(xname))
# colors.hev <- scico(5, palette = 'lajolla')[2:4]
<- scico(8, palette = 'lapaz')[c(7,5,3)]
colors.hev # colors.hev <- c(
# rgb(242,168,168, maxColorValue = 255),
# rgb(164,24,24, maxColorValue = 255),
# rgb(75,13,13, maxColorValue = 255))
<- scico(7,palette='roma')[2]
ctycol <- roma.colors[5] saccol
<- 1000 nsamp
## 1981 - all variables
load('_results/county_rfmodel_1981.Rdata')
.1981 <- f.rf
f.cty.1981 <- df.pca
df.pca.cty.1981 <- df.train
df.train.cty.1981 <- df.test
df.test.cty.1981 <- obs.id
obs.cty.1981 <- sample(1:nrow(df.pca.cty.1981), nsamp)
samp.cty
load(paste0('_results/county_iml_1981.Rdata'))
.1981 <- iml.ale
ale.ctyload(paste0('_results/county_shap_1981.Rdata'))
.1981 <- shap shap.cty
## 2009 - all variables
load('_results/county_rfmodel_2009.Rdata')
.2009 <- f.rf
f.cty.2009 <- df.pca
df.pca.cty.2009 <- df.train
df.train.cty.2009 <- df.test
df.test.cty.2009 <- obs.id
obs.cty.2009 <- sample(1:nrow(df.pca.cty.2009), nsamp)
samp.cty
load(paste0('_results/county_iml_2009.Rdata'))
.2009 <- iml.ale
ale.ctyload(paste0('_results/county_shap_2009.Rdata'))
.2009 <- shap shap.cty
## 1981 - all variables
load('_results/sac_rfmodel_1981.Rdata')
.1981 <- f.rf
f.sac.1981 <- df.pca
df.pca.sac.1981 <- df.train
df.train.sac.1981 <- df.test
df.test.sac.1981 <- obs.id
obs.sac.1981 <- sample(1:nrow(df.pca.sac.1981), nsamp)
samp.sac
load(paste0('_results/sac_iml_1981.Rdata'))
.1981 <- iml.ale
ale.sacload(paste0('_results/sac_shap_1981.Rdata'))
.1981 <- shap shap.sac
## 2009 - all variables
load('_results/sac_rfmodel_2009.Rdata')
.2009 <- f.rf
f.sac.2009 <- df.pca
df.pca.sac.2009 <- df.train
df.train.sac.2009 <- df.test
df.test.sac.2009 <- obs.id
obs.sac.2009 <- sample(1:nrow(df.pca.sac.2009), nsamp)
samp.sac
load(paste0('_results/sac_iml_2009.Rdata'))
.2009 <- iml.ale
ale.sacload(paste0('_results/sac_shap_2009.Rdata'))
.2009 <- shap shap.sac
rm(f.rf, df.pca, df.train, df.test, obs.id, iml.ale, shap)
rbind(
.1981 %>% count(y) %>%
df.train.ctysummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Statewide', temp = '1981-2021'),
.1981 %>% count(y) %>%
df.test.ctysummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Statewide', temp = '1981-2021'),
.2009 %>% count(y) %>%
df.train.ctysummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Statewide', temp = '2009-2021'),
.2009 %>% count(y) %>%
df.test.ctysummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Statewide', temp = '2009-2021'),
.1981 %>% count(y) %>%
df.train.sacsummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Sacramento', temp = '1981-2021'),
.1981 %>% count(y) %>%
df.test.sacsummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Sacramento', temp = '1981-2021'),
.2009 %>% count(y) %>%
df.train.sacsummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Sacramento', temp = '2009-2021'),
.2009 %>% count(y) %>%
df.test.sacsummarize(total = sum(n), yes = n[y=='yes']) %>%
mutate(spat = 'Sacramento', temp = '2009-2021')
%>%
) group_by(spat = factor(spat, levels = c('Statewide','Sacramento')),temp) %>%
summarize(across(everything(), sum)) %>%
mutate(pct = yes/total) %>%
%>%
ungroup %>%
gt fmt_markdown(c(spat, temp)) %>%
fmt_number(c(total, yes), decimals = 0) %>%
fmt_percent(pct) %>%
cols_label(
spat = html('Spatial<br>Scale'),
temp = html('Temporal<br>Scale'),
total = html('Number of<br>Records'),
yes = html('Damaging<br>Records'),
pct = html('Class<br>Balance')) %>%
tab_header('Dataset Statistics by Spatial and Temporal Scale') %>%
tab_options(
heading.background.color = '#d9d9d9',
column_labels.background.color = '#f2f2f2')
Dataset Statistics by Spatial and Temporal Scale | ||||
---|---|---|---|---|
Spatial Scale |
Temporal Scale |
Number of Records |
Damaging Records |
Class Balance |
Statewide | 1981-2021 |
17,265 | 863 | 5.00% |
Statewide | 2009-2021 |
5,659 | 234 | 4.14% |
Sacramento | 1981-2021 |
145,977 | 874 | 0.60% |
Sacramento | 2009-2021 |
47,776 | 114 | 0.24% |
.1981 <-
data.ctypredict(f.cty.1981$finalModel, df.test.cty.1981, type = 'prob') %>%
%>%
as.data.frame transmute(pred = yes, obs = df.test.cty.1981$y) %>%
arrange(pred)
.1981 <-
metrics.ctyforeach (i = (0:100)/100, .combine = 'rbind') %do% {
::confusionMatrix(
caretdata = factor(ifelse(data.cty.1981$pred > i, 'yes', 'no'), levels = c('no','yes')),
reference = data.cty.1981$obs,
positive = 'yes')$byClass %>% c(i = i)
%>% data.frame
}
.1981 <- pr.curve(
pr.auc.ctyscores.class0 = data.cty.1981$pred,
weights.class0 = data.cty.1981$obs=='yes')$auc.integral
.1981 <- roc(response = data.cty.1981$obs=='yes', predictor = data.cty.1981$pred)$auc %>% toNumber
roc.auc.cty.1981 <- confusionMatrix(predict(f.cty.1981$finalModel, df.test.cty.1981), df.test.cty.1981$y, positive = 'yes')
stats.cty.1981 <- c(stats.cty.1981$overall, stats.cty.1981$byClass) stats.cty
.2009 <-
data.ctypredict(f.cty.2009$finalModel, df.test.cty.2009, type = 'prob') %>%
%>%
as.data.frame transmute(pred = yes, obs = df.test.cty.2009$y) %>%
arrange(pred)
.2009 <-
metrics.ctyforeach (i = (0:100)/100, .combine = 'rbind') %do% {
::confusionMatrix(
caretdata = factor(ifelse(data.cty.2009$pred > i, 'yes', 'no'), levels = c('no','yes')),
reference = data.cty.2009$obs,
positive = 'yes')$byClass %>% c(i = i)
%>% data.frame
}
.2009 <- pr.curve(
pr.auc.ctyscores.class0 = data.cty.2009$pred,
weights.class0 = data.cty.2009$obs=='yes')$auc.integral
.2009 <- roc(response = data.cty.2009$obs=='yes', predictor = data.cty.2009$pred)$auc %>% toNumber
roc.auc.cty.2009 <- confusionMatrix(predict(f.cty.2009$finalModel, df.test.cty.2009), df.test.cty.2009$y, positive = 'yes')
stats.cty.2009 <- c(stats.cty.2009$overall, stats.cty.2009$byClass) stats.cty
.1981 <-
data.sacpredict(f.sac.1981$finalModel, df.test.sac.1981, type = 'prob') %>%
%>%
as.data.frame transmute(pred = yes, obs = df.test.sac.1981$y) %>%
arrange(pred)
.1981 <-
metrics.sacforeach (i = (0:100)/100, .combine = 'rbind') %do% {
::confusionMatrix(
caretdata = factor(ifelse(data.sac.1981$pred > i, 'yes', 'no'), levels = c('no','yes')),
reference = data.sac.1981$obs,
positive = 'yes')$byClass %>% c(i = i)
%>% data.frame
}
.1981 <- pr.curve(
pr.auc.sacscores.class0 = data.sac.1981$pred,
weights.class0 = data.sac.1981$obs=='yes')$auc.integral
.1981 <- roc(response = data.sac.1981$obs=='yes', predictor = data.sac.1981$pred)$auc %>% toNumber
roc.auc.sac.1981 <- confusionMatrix(predict(f.sac.1981$finalModel, df.test.sac.1981), df.test.sac.1981$y, positive = 'yes')
stats.sac.1981 <- c(stats.sac.1981$overall, stats.sac.1981$byClass) stats.sac
.2009 <-
data.sacpredict(f.sac.2009$finalModel, df.test.sac.2009, type = 'prob') %>%
%>%
as.data.frame transmute(pred = yes, obs = df.test.sac.2009$y) %>%
arrange(pred)
.2009 <-
metrics.sacforeach (i = (0:100)/100, .combine = 'rbind') %do% {
::confusionMatrix(
caretdata = factor(ifelse(data.sac.2009$pred > i, 'yes', 'no'), levels = c('no','yes')),
reference = data.sac.2009$obs,
positive = 'yes')$byClass %>% c(i = i)
%>% data.frame
}
.2009 <- pr.curve(
pr.auc.sacscores.class0 = data.sac.2009$pred,
weights.class0 = data.sac.2009$obs=='yes')$auc.integral
.2009 <- roc(response = data.sac.2009$obs=='yes', predictor = data.sac.2009$pred)$auc %>% toNumber
roc.auc.sac.2009 <- confusionMatrix(predict(f.sac.2009$finalModel, df.test.sac.2009), df.test.sac.2009$y, positive = 'yes')
stats.sac.2009 <- c(stats.sac.2009$overall, stats.sac.2009$byClass) stats.sac
<-
stats.all rbind(
data.frame(t(stats.cty.1981)) %>% cbind(spat = 'Statewide', temp = '1981-2021'),
data.frame(t(stats.cty.2009)) %>% cbind(spat = 'Statewide', temp = '2009-2021'),
data.frame(t(stats.sac.1981)) %>% cbind(spat = 'Sacramento', temp = '1981-2021'),
data.frame(t(stats.sac.2009)) %>% cbind(spat = 'Sacramento', temp = '2009-2021')) %>%
select(Accuracy, Sensitivity, Specificity, Precision, Recall, Balanced.Accuracy, spat, temp) %>%
mutate(
PR.AUC = c(pr.auc.cty.1981, pr.auc.cty.2009, pr.auc.sac.1981, pr.auc.sac.2009),
ROC.AUC = c(roc.auc.cty.1981, roc.auc.cty.2009, roc.auc.sac.1981, roc.auc.sac.2009),
PR.null = c(
sum(df.test.cty.1981$y=='yes')/nrow(df.test.cty.1981),
sum(df.test.cty.2009$y=='yes')/nrow(df.test.cty.2009),
sum(df.test.sac.1981$y=='yes')/nrow(df.test.sac.1981),
sum(df.test.sac.2009$y=='yes')/nrow(df.test.sac.2009)),
ROC.null = rep(0.5,4),
Balanced.null = rep(0.5,4))
<-
metrics.all rbind(
.1981 %>% mutate(spat = 'Statewide', temp = '1981-2021'),
metrics.cty.2009 %>% mutate(spat = 'Statewide', temp = '2009-2021'),
metrics.cty.1981 %>% mutate(spat = 'Sacramento', temp = '1981-2021'),
metrics.sac.2009 %>% mutate(spat = 'Sacramento', temp = '2009-2021')) %>%
metrics.sacmutate(xlab = paste(spat, temp, sep = '\n'))
<- ggplot(stats.all) +
g2 geom_hline(
aes(yintercept = PR.null, color = spat, alpha = temp),
linetype = 'dashed', show.legend = FALSE) +
geom_line(
data = metrics.all,
aes(x = Recall, y = Precision, group = xlab, color = spat, alpha = temp),
size = 0.75) +
geom_point(
aes(x = Recall, y = Precision, color = spat, alpha = temp),
size = 2, show.legend = FALSE) +
scale_x_continuous(limits = c(0,1), expand = expansion(mult = c(0,0.01))) +
scale_y_continuous(limits = c(0,1), expand = expansion(mult = c(0,0.01))) +
scale_color_manual('Spatial\nResolution', values = c(saccol, ctycol)) +
scale_alpha_manual('Temporal\nResolution', values = c(1,0.4)) +
guides(
color = guide_legend(override.aes = list(size = 1.25)),
alpha = guide_legend(override.aes = list(size = 1.25, alpha = c(1,0.25)))) +
coord_fixed(clip = 'off') +
theme(
text = element_text(family = 'Segoe UI', size = 9),
panel.grid.major = element_line(size = 0.25),
legend.position = 'bottom')
<- ggplot(stats.all) +
g1 geom_line(
data = metrics.all,
aes(x = Specificity, y = Sensitivity, group = xlab, color = spat, alpha = temp),
size = 0.75) +
geom_point(
aes(x = Specificity, y = Sensitivity, color = spat, alpha = temp),
size = 2, show.legend = FALSE) +
scale_x_reverse(limits = c(1,0), expand = expansion(mult = c(0,0.01))) +
scale_y_continuous(limits = c(0,1), expand = expansion(mult = c(0,0.01))) +
geom_abline(slope = 1, intercept = 1, linetype = 'dashed') +
scale_color_manual('Spatial\nResolution', values = c(saccol, ctycol)) +
scale_alpha_manual('Temporal\nResolution', values = c(1,0.4)) +
guides(
color = guide_legend(override.aes = list(size = 1.25)),
alpha = guide_legend(override.aes = list(size = 1.25, alpha = c(1,0.25)))) +
coord_fixed(clip = 'off') +
theme(
text = element_text(family = 'Segoe UI', size = 9),
panel.grid.major = element_line(size = 0.25),
legend.position = 'bottom')
+ g2 + guide_area() +
g1 plot_layout(design = 'ab\ncc', heights = c(100,1), guides = 'collect') +
plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') &
theme(
legend.background = element_blank(),
plot.tag = element_text(face = 'bold'), legend.box = 'horizontal')
ggsave('_figures/fig1_performance.png', width = 6, height = 3)
%>%
stats.all select(spat, temp, ROC.AUC, ROC.null, PR.AUC, PR.null, Balanced.Accuracy, Balanced.null) %>%
%>%
gt fmt_markdown(c(spat,temp)) %>%
fmt_number(c(starts_with('PR'), starts_with('ROC'), starts_with('Balanced')), decimals = 3) %>%
tab_spanner(label = 'ROC-AUC', columns = c(ROC.AUC, ROC.null)) %>%
tab_spanner(label = 'PR-AUC', columns = c(PR.AUC, PR.null)) %>%
tab_spanner(label = html('Balanced<br>Accuracy'), columns = c(Balanced.Accuracy, Balanced.null)) %>%
cols_label(
spat = html('Spatial<br>Scale'),
temp = html('Temporal<br>Scale'),
ROC.AUC = 'RF',
ROC.null = 'Null',
PR.AUC = 'RF',
PR.null = 'Null',
Balanced.Accuracy = 'RF',
Balanced.null = 'Null'
%>%
) tab_header('Random Forest Model Performance Metrics') %>%
tab_options(
heading.background.color = '#d9d9d9',
column_labels.background.color = '#f2f2f2')
Random Forest Model Performance Metrics | |||||||
---|---|---|---|---|---|---|---|
Spatial Scale |
Temporal Scale |
ROC-AUC | PR-AUC |
Balanced Accuracy |
|||
RF | Null | RF | Null | RF | Null | ||
Statewide |
1981-2021 |
0.914 | 0.500 | 0.435 | 0.051 | 0.707 | 0.500 |
Statewide |
2009-2021 |
0.914 | 0.500 | 0.353 | 0.041 | 0.657 | 0.500 |
Sacramento |
1981-2021 |
0.959 | 0.500 | 0.311 | 0.007 | 0.761 | 0.500 |
Sacramento |
2009-2021 |
0.898 | 0.500 | 0.046 | 0.002 | 0.673 | 0.500 |
.1981 <- shap.cty.1981 %>%
plotimp.cty%>% select(-y) %>%
as.data.frame summarize(across(everything(), ~mean(abs(.)))) %>%
pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>%
left_join(dimension, by = c('varnames' = 'var')) %>%
arrange(shap) %>%
filter(shap >= shap[varnames=='noise']) %>%
mutate(
rank = nrow(.):1,
varnames = factor(varnames, levels = varnames),
fullname = factor(fullname, levels = fullname),
shap = c(0, shap[-1]),
dim = factor(dim, levels = c('H','E','V')))
.2009 <- shap.cty.2009 %>%
plotimp.cty%>% select(-y) %>%
as.data.frame summarize(across(everything(), ~mean(abs(.)))) %>%
pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>%
left_join(dimension, by = c('varnames' = 'var')) %>%
arrange(shap) %>%
filter(shap >= shap[varnames=='noise']) %>%
mutate(
rank = nrow(.):1,
varnames = factor(varnames, levels = varnames),
fullname = factor(fullname, levels = fullname),
shap = c(0, shap[-1]),
dim = factor(dim, levels = c('H','E','V')))
.1981 <- shap.sac.1981 %>%
plotimp.sac%>% select(-y) %>%
as.data.frame summarize(across(everything(), ~mean(abs(.)))) %>%
pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>%
left_join(dimension, by = c('varnames' = 'var')) %>%
arrange(shap) %>%
filter(shap >= shap[varnames=='noise']) %>%
mutate(
rank = nrow(.):1,
varnames = factor(varnames, levels = varnames),
fullname = factor(fullname, levels = fullname),
shap = c(0, shap[-1]),
dim = factor(dim, levels = c('H','E','V')))
.2009 <- shap.sac.2009 %>%
plotimp.sac%>% select(-y) %>%
as.data.frame summarize(across(everything(), ~mean(abs(.)))) %>%
pivot_longer(everything(), names_to = 'varnames', values_to = 'shap') %>%
left_join(dimension, by = c('varnames' = 'var')) %>%
arrange(shap) %>%
filter(shap >= shap[varnames=='noise']) %>%
mutate(
rank = nrow(.):1,
varnames = factor(varnames, levels = varnames),
fullname = factor(fullname, levels = fullname),
shap = c(0, shap[-1]),
dim = factor(dim, levels = c('H','E','V')))
<-
plotimp.all rbind(
.1981 %>% mutate(spat = 'Statewide', temp = '1981-2021'),
plotimp.cty.2009 %>% mutate(spat = 'Statewide', temp = '2009-2021'),
plotimp.cty.1981 %>% mutate(spat = 'Sacramento', temp = '1981-2021'),
plotimp.sac.2009 %>% mutate(spat = 'Sacramento', temp = '2009-2021')) %>%
plotimp.sacfilter(varnames != 'noise')
<- plotimp.all %>%
df group_by(spat,temp,dim,concept) %>%
summarize(imptce = Sum(shap)) %>%
group_by(spat,temp) %>%
mutate(imptce = imptce/sum(imptce)) %>%
ungroup<- df %>%
df group_by(spat,temp,dim) %>%
summarize(imptce.hev = Sum(imptce)) %>%
mutate(dim = factor(dim, levels = c('H','E','V'))) %>%
arrange(rev(dim)) %>%
mutate(
start = cumsum(imptce.hev),
end = c(0, start[-length(dim)]),
lab = cbind(start,end) %>% apply(1,mean)) %>%
%>%
ungroup select(-start, -end) %>%
left_join(
%>% select(spat,temp,dim,concept,imptce),
df by = c('spat','temp','dim')) %>%
group_by(spat,temp,dim) %>%
mutate(
id = 1:length(dim),
lab = case_when(id==1 ~ lab)) %>%
select(-id) %>%
%>%
ungroup group_by(dim) %>% arrange(factor(concept)) %>%
mutate(
conceptid = paste(dim, concept, sep = '-') %>%
factor(levels = rev(c(
'H-ARchar','H-antecedent','H-modes-lsm','E-people','E-housing',
'E-indicators','V-social','V-infra','V-memory')))) %>%
arrange(conceptid) %>%
mutate(xlab = paste(spat, temp, sep = '\n')) %>%
mutate(xlab = factor(xlab, levels = rev(levels(factor(xlab))))) %>%
%>%
ungroup mutate(
xlab = paste(spat,temp,sep = '\n') %>% factor(
levels = c('Statewide\n1981-2021', 'Statewide\n2009-2021', 'Sacramento\n1981-2021', 'Sacramento\n2009-2021')))
ggplot(df) +
geom_col(
aes(x = as.numeric(xlab), y = imptce, group = dim),
width = 0.75, fill = 'white', color = NA) +
geom_col(
aes(x = as.numeric(xlab), y = imptce, group = dim, fill = dim, alpha = conceptid),
width = 0.75) +
geom_col(
data = df %>%
group_by(spat,temp,dim) %>%
summarize(
imptce.hev = sum(imptce), dim = dim[1], xlab = xlab[1]) %>%
ungroup,aes(x = as.numeric(xlab), y = imptce.hev, group = dim),
width = 0.75, size = 0.25, fill = NA, color = 'grey10') +
geom_text(
aes(x = as.numeric(xlab), y = lab, label = percent(imptce.hev, accuracy=1)),
family = 'Segoe UI', fontface = 'bold', size = 9/.pt) +
geom_text_repel(
data = df %>%
filter(spat == 'Statewide' & temp == '1981-2021') %>%
mutate(
start = cumsum(imptce),
end = c(0, start[-length(concept)]),
lab = cbind(start,end) %>% apply(1,mean),
concept_name = case_match(
concept, 'ARchar' ~ 'AR Characteristics',
'antecedent' ~ 'Antecedent\nConditions',
'modes-lsm' ~ 'Climate Modes +\nLand Surface',
'people' ~ 'Population',
'housing' ~ 'Housing',
'indicators' ~ 'Insurance Takeup',
'social' ~ 'Social Vulnerability',
'infra' ~ 'Infrastructural\nVulnerability',
'memory' ~ 'Flood Experience')),
aes(x = as.numeric(xlab)-0.75/2, y = lab, label = concept_name),
direction = 'y', min.segment.length = 0,
family = 'Segoe UI', size = 8/.pt, hjust = 1, nudge_x = -0.225, lineheight = 0.8) +
scale_fill_manual(
'Dimension',
breaks = c('H','E','V'),
labels = c('Hazard','Exposure','Vulnerability'),
values = colors.hev) +
scale_alpha_manual(
'Dimension',
values = c(1,0.8,0.6,1,0.75,0.5,1,0.7,0.4),
guide = guide_none()) +
scale_x_continuous(
breaks = 1:4, labels = levels(df$xlab),
expand = expansion(add = c(1.25,0.25))) +
scale_y_origin('SHAP Feature Importance') +
theme(
legend.position = 'bottom',
panel.grid.major.y = element_line(size = 0.25),
axis.title.x = element_blank(),
axis.text.x = element_text(size = 9, color = 'black'),
legend.box.margin = ggplot2::margin(-5,0,0,0))
ggsave('_figures/fig2_imptce.png', width = 6, height = 4)
<- plotimp.all %>%
df mutate(shap = setNA(shap,0)) %>%
pivot_wider(names_from = c(spat,temp), values_from = c(shap,rank)) %>%
mutate(
across(starts_with('shap'), ~setNA(.,0)),
across(starts_with('rank'), ~setNA(.,30))) %>%
mutate(totalrank = select(., starts_with('rank')) %>% apply(1,sum)) %>%
pivot_longer(
cols = c(starts_with('rank'), starts_with('shap')),
names_to = c('shaprank', 'spat', 'temp'), names_sep = '_') %>%
pivot_wider(names_from = shaprank, values_from = value) %>%
arrange(desc(totalrank)) %>%
mutate(
fullname = factor(fullname, levels = unique(fullname)),
xlab = paste(spat,temp,sep = '\n') %>% factor(
levels = c('Statewide\n1981-2021', 'Statewide\n2009-2021', 'Sacramento\n1981-2021', 'Sacramento\n2009-2021')))
ggplot(df) +
geom_tile(
aes(x = as.numeric(xlab), y = fullname, fill = shap),
width = 0.9) +
scale_fill_gradient(
'SHAP Feature \nContribution ',
low = 'white', high = '#50245b', label = percent) +
geom_text(
aes(x = 0.85, y = fullname, label = dim, color = dim),
family = 'Segoe UI', size = 9/.pt, fontface = 'bold', nudge_x = -0.575,
show.legend = FALSE) +
scale_color_manual(values = c('#dda077', colors.hev[-1])) +
::new_scale_color() +
ggnewscalegeom_text(
data = df %>% filter(rank <= 10),
aes(x = as.numeric(xlab), y = fullname, label = rank, color = rank<=5),
family = 'Segoe UI', size = 8/.pt,
show.legend = FALSE) +
scale_color_manual(values = c('black','white')) +
scale_x_continuous(
breaks = 1:4, labels = levels(df$xlab),
position = 'top', expand = expansion(add = c(0.15,0.25))) +
scale_y_discrete(expand = expansion(0.035)) +
theme(
axis.title = element_blank(),
axis.text.x = element_text(size = 9, color = 'black'),
axis.text.y = element_text(size = 8, color = 'black'),
axis.line = element_blank(),
panel.background = element_rect(fill = 'white', color = 'grey70'),
legend.position = 'bottom',
legend.box.margin = ggplot2::margin(-5,0,0,0),
legend.key.height = unit(10, 'pt'),
legend.key.width = unit(25, 'pt'))
ggsave('_figures/fig3_rank_new.png', width = 6, height = 5)
## set plot parameters
<- 0.5
ptsize <- 0.75
linesize <- 'grey50'
ptcol
<- c(-0.3, 0.25)
rangehaz <- c(-0.15, 0.1)
rangeexp <- c(-0.15, 0.1)
rangevuln
<- ggplot() +
gbase geom_hline(yintercept = 0, color = 'grey50', size = 0.25) +
scale_y_continuous(
'SHAP Feature Contribution',
labels = percent,
sec.axis = sec_axis(~., name = 'Accumulated Local Effects', labels = percent)) +
theme(
text = element_text(size = 9),
panel.grid.major.y = element_line(size = 0.25),
axis.line.y.right = element_line(),
axis.ticks.y.right = element_line(),
axis.title.y.left = element_blank(),
axis.title.y.right = element_blank(),
axis.text.y.left = element_blank(),
axis.text.y.right = element_blank())
.1981 %>%
plotimp.ctyfilter(dim == 'H') %>% arrange(desc(shap)) %>%
head(3) %>% pull(varnames) %>% paste
## [1] "logprecip_total" "maxivt" "duration"
<- 'logprecip_total'
xname <- gbase +
g1h geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
title = 'Hazard',
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
scale_x_continuous(expand = expansion(mult = c(0.02,0.05))) +
coord_cartesian(xlim = c(0,250), ylim = rangehaz) +
theme(
plot.title = element_text(face = 'bold', color = '#dda077'),
axis.text.y.left = element_text())
<- 'maxivt'
xname <- gbase +
g2h geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
coord_cartesian(xlim = c(250,NA), ylim = rangehaz) +
labs(
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
scale_x_continuous(breaks = 250*(0:10), expand = expansion(mult = c(0,0.05)))
<- 'duration'
xname <- gbase +
g3h geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
scale_x_continuous(expand = expansion(mult = c(0.02,0.05))) +
coord_cartesian(xlim = c(0,150), ylim = rangehaz) +
theme(
axis.text.y.right = element_text())
.1981 %>%
plotimp.ctyfilter(dim == 'E') %>% arrange(desc(shap)) %>%
head(3) %>% pull(varnames) %>% paste
## [1] "loghu" "pop_pct_floodplain" "pct_sfh"
<- 'loghu'
xname <- gbase +
g1e geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
title = 'Exposure',
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
scale_x_log10(
breaks = 10^(1:10),
labels = c('10','100','1K','10K','100K','1M','10M','100M','1B','10B')) +
annotation_logticks(sides = 'b', size = 0.25, color = 'grey25') +
coord_cartesian(ylim = rangeexp) +
theme(
plot.title = element_text(face = 'bold', color = '#71938f'),
axis.title.y.left = element_text(),
axis.text.y.left = element_text())
<- 'pop_pct_floodplain'
xname <- gbase +
g2e geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
scale_x_continuous(labels = percent, expand = expansion(mult = c(0.02,0.05))) +
coord_cartesian(xlim = c(0,0.4), ylim = rangeexp)
<- 'pct_sfh'
xname <- gbase +
g3e geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 24)) +
scale_x_continuous(labels = percent, expand = expansion(mult = 0.05)) +
coord_cartesian(xlim = c(0.4,NA), ylim = rangeexp) +
theme(
axis.title.y.right = element_text(),
axis.text.y.right = element_text())
.1981 %>%
plotimp.ctyfilter(dim == 'V') %>% arrange(desc(shap)) %>%
head(3) %>% pull(varnames) %>% paste
## [1] "CRS" "hhincome22" "cal_polburd"
<- 'CRS'
xname <- gbase +
g1v geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = jitter(x), y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(.borders > 2) %>%
mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
title = 'Vulnerability',
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
coord_cartesian(ylim = rangevuln) +
scale_x_continuous(breaks = 2:10) +
theme(
plot.title = element_text(face = 'bold', color = colors.hev[3]),
axis.text.y.left = element_text())
<- 'hhincome22'
xname <- gbase +
g2v geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
scale_x_continuous(labels = comma_format(scale = 1e-3, prefix = '$', suffix = 'K')) +
coord_cartesian(ylim = rangevuln)
<- 'cal_polburd'
xname <- gbase +
g3v geom_point(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>%
mutate(y = shap.cty.1981 %>% data.frame %>% slice(samp.cty.1981) %>% pull(xname)),
aes(x = x, y = y), size = ptsize, color = ptcol, alpha = 0.25) +
geom_line(
data = ale.cty.1981 %>%
filter(.feature == xname) %>%
mutate(.borders = toNumber(.borders)) %>%
filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975)) %>%
.borders mutate(x = ifelse(grepl('log',.feature), 10^.borders-1, .borders)),
aes(x = x, y = .value), size = linesize) +
labs(
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
coord_cartesian(ylim = rangevuln) +
theme(
axis.text.y.right = element_text())
+ g2h + g3h +
g1h + g2e + g3e +
g1e + g2v + g3v +
g1v plot_layout(nrow = 3) +
plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') &
theme(
legend.position = 'none',
plot.tag = element_text(face = 'bold'),
plot.tag.position = c(0.12,0.975)) #1.01 without the title
ggsave('_figures/fig4_impact.png', width = 6, height = 6)
<- df.pca.cty.1981 %>% filter(cat==1)
df.subset <- Predictor$new(
predictor1 .1981$finalModel,
f.ctydata = df.subset,
y = df.subset$y=='yes',
class = 'yes',
type = 'prob')
<- df.pca.cty.1981 %>% filter(cat==2)
df.subset <- Predictor$new(
predictor2 .1981$finalModel,
f.ctydata = df.subset,
y = df.subset$y=='yes',
class = 'yes',
type = 'prob')
<- df.pca.cty.1981 %>% filter(cat==3)
df.subset <- Predictor$new(
predictor3 .1981$finalModel,
f.ctydata = df.subset,
y = df.subset$y=='yes',
class = 'yes',
type = 'prob')
<- df.pca.cty.1981 %>% filter(cat==4)
df.subset <- Predictor$new(
predictor4 .1981$finalModel,
f.ctydata = df.subset,
y = df.subset$y=='yes',
class = 'yes',
type = 'prob')
<- df.pca.cty.1981 %>% filter(cat==5)
df.subset <- Predictor$new(
predictor5 .1981$finalModel,
f.ctydata = df.subset,
y = df.subset$y=='yes',
class = 'yes',
type = 'prob')
.1981 <-
bycat.ctyforeach (
xname = plotimp.cty.1981 %>% filter(dim %in% c('E','V')) %>% pull(varnames) %>% paste,
.combine = 'rbind') %do% {
<- rbind(
df $new(predictor1, feature = xname, method = 'ale', grid.size = 50) %>%
FeatureEffect$results %>% setNames(c('.type', '.value', '.borders')) %>%
.mutate(.borders = toNumber(.borders), .feature = xname, cat = 1),
$new(predictor2, feature = xname, method = 'ale', grid.size = 50) %>%
FeatureEffect$results %>% setNames(c('.type', '.value', '.borders')) %>%
.mutate(.borders = toNumber(.borders), .feature = xname, cat = 2),
$new(predictor3, feature = xname, method = 'ale', grid.size = 50) %>%
FeatureEffect$results %>% setNames(c('.type', '.value', '.borders')) %>%
.mutate(.borders = toNumber(.borders), .feature = xname, cat = 3),
$new(predictor4, feature = xname, method = 'ale', grid.size = 50) %>%
FeatureEffect$results %>% setNames(c('.type', '.value', '.borders')) %>%
.mutate(.borders = toNumber(.borders), .feature = xname, cat = 4),
$new(predictor5, feature = xname, method = 'ale', grid.size = 50) %>%
FeatureEffect$results %>% setNames(c('.type', '.value', '.borders')) %>%
.mutate(.borders = toNumber(.borders), .feature = xname, cat = 5))
if (xname %in% c('CRS', 'disasters', 'coastal')) df else {
%>%
df filter(
> quantile(df.pca.cty.1981[,xname],0.025) &
.borders < quantile(df.pca.cty.1981[,xname],0.975))
.borders
} }
.1981 %>%
bycat.ctyfilter(!(.feature %in% c(
'disasters', 'coastal', 'pct_white_nonhisp', 'pct_working', 'pct_mobile', 'pct_ownocc',
'med_struct_age', 'hhincome22', 'cdc_theme4', 'cal_popchar', 'pop_pct_floodplain'))) %>%
left_join(dimension, by = c('.feature' = 'var')) %>%
ggplot() +
geom_hline(yintercept = 0, color = 'grey50', size = 0.25) +
geom_line(aes(x = .borders, y = .value, group = cat, color = factor(cat)), size = 1) +
geom_rug(
data = df.pca.cty.1981 %>%
select(names(.)[names(.) %in% (dimension %>% filter(dim %in% c('E','V')) %>% pull(var))]) %>%
slice(sample(1:nrow(.),1000)) %>%
pivot_longer(everything(), names_to = '.feature', values_to = 'x') %>%
left_join(dimension, by = c('.feature' = 'var')) %>%
filter(!(.feature %in% c(
'disasters', 'coastal', 'pct_white_nonhisp', 'pct_working', 'pct_mobile', 'pct_ownocc',
'med_struct_age', 'hhincome22', 'cdc_theme4', 'cal_popchar', 'pop_pct_floodplain'))),
aes(x = x)) +
facet_wrap(~paste(.feature,dim), scales = 'free') +
scale_color_manual('AR Category', values = rev(roma.colors)) +
theme(
legend.position = 'bottom',
panel.grid.major.y = element_line(size = 0.25))
<- c(-0.24,0.12)
yrange <- ggplot() +
gbase geom_hline(yintercept = 0, color = 'grey50', size = 0.25) +
scale_y_continuous(labels = percent, expand = expansion(0.025,0.035), limits = yrange) +
scale_color_manual('AR Category', values = rev(roma.colors)) +
labs(y = 'Accumulated Local Effects') +
theme(
text = element_text(size = 9),
legend.position = 'bottom',
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.margin = ggplot2::margin(0,0,-5,0),
panel.grid.major.y = element_line(size = 0.25),
plot.tag = element_text(size = 10, face = 'bold'),
plot.tag.position = c(0.025,0.975),
plot.margin = ggplot2::margin(16,2,2,2))
<- 'loghu'
xname <- gbase +
g1 geom_smooth(
data = bycat.cty.1981 %>%
filter(.feature == xname) %>%
left_join(dimension, by = c('.feature' = 'var')) %>%
select(.borders, .value, cat) %>%
setNames(c(xname,'y','cat')) %>% transfm(.,xname),
aes(x = x, y = y, group = factor(cat), color = factor(cat)),
size = 0.75, se = FALSE, span = 0.25) +
scale_x_log10(
breaks = 10^(1:10),
labels = c('10','100','1K','10K','100K','1M','10M','100M','1B','10B')) +
labs(
title = ' Exposure',
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 18)) +
geom_rug(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
aes(x = x), size = 0.25, alpha = 0.25) +
theme(
plot.title = element_text(face = 'bold', color = '#71938f'),
plot.tag.position = c(0.25,0.975))
<- 'pct_sfh'
xname <- gbase +
g2 geom_smooth(
data = bycat.cty.1981 %>%
filter(.feature == xname) %>%
left_join(dimension, by = c('.feature' = 'var')) %>%
select(.borders, .value, cat) %>%
setNames(c(xname,'y','cat')) %>% transfm(.,xname),
aes(x = x, y = y, group = factor(cat), color = factor(cat)),
size = 0.75, se = FALSE, span = 0.25) +
scale_x_continuous(labels = percent, limits = c(0.4,NA)) +
labs(x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 18)) +
geom_rug(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
aes(x = x), size = 0.25, alpha = 0.25) +
theme(axis.title.y = element_blank(), axis.text.y = element_blank())
<- 'cal_polburd'
xname <- gbase +
g3 geom_smooth(
data = bycat.cty.1981 %>%
filter(.feature == xname) %>%
left_join(dimension, by = c('.feature' = 'var')) %>%
select(.borders, .value, cat) %>%
setNames(c(xname,'y','cat')) %>% transfm(.,xname),
aes(x = x, y = y, group = factor(cat), color = factor(cat)),
size = 0.75, se = FALSE, span = 0.25) +
scale_x_continuous() +
labs(
title = ' Vulnerability',
x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 20)) +
geom_rug(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
aes(x = x), size = 0.25, alpha = 0.25) +
theme(
plot.title = element_text(face = 'bold', color = colors.hev[3]),
axis.title.y = element_blank(), axis.text.y = element_blank())
<- 'CRS'
xname <- gbase +
g4 geom_line(
data = bycat.cty.1981 %>%
filter(.feature == xname) %>%
left_join(dimension, by = c('.feature' = 'var')) %>%
select(.borders, .value, cat) %>%
setNames(c(xname,'y','cat')) %>% transfm(.,xname),
aes(x = x, y = y, group = factor(cat), color = factor(cat)),
size = 0.75, show.legend = FALSE) +
scale_x_continuous(breaks = 2:10) +
labs(x = dimension$fullname[dimension$var==xname] %>% str_wrap(width = 18)) +
geom_rug(
data = transfm(df.pca.cty.1981, xname) %>% slice(samp.cty.1981) %>% select(x),
aes(x = jitter(x)), size = 0.25, alpha = 0.25) +
theme(axis.title.y = element_blank(), axis.text.y = element_blank())
+ g2 + g3 + g4 + guide_area() +
g1 plot_layout(design = 'abcd\neeee', guides = 'collect', heights = c(10,1)) +
plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')')
ggsave('_figures/fig5_interaction.png', width = 6.5, height = 2.5)