source('_data/setup.R')
suppressPackageStartupMessages({
require(DMwR)
require(themis)
})
load('_data/_checkpoints/county_catalog_0705.Rdata')
load('_results/county_rfmodel_1981.Rdata')
load('_data/_checkpoints/bg_0417.Rdata')
load('_data/policies_save.Rdata')
<- california %>%
polys arrange(NAME) %>%
transmute(id = 1:nrow(.), name = NAME, geometry)
<- policies %>%
policies.poly transmute(
floodZone,county = str_sub(countyCode, start = 3, end = 5),
tract = str_sub(censusTract, start = 6),
policy_start = ymd_hms(policyEffectiveDate),
policy_end = ymd_hms(policyTerminationDate)) %>%
filter(complete.cases(.)) %>%
filter(ymd('2021-10-01') %within% interval(policy_start, policy_end)) %>%
left_join(california %>% st_drop_geometry %>% select(COUNTYFP,NAME), by = c('county' = 'COUNTYFP')) %>%
left_join(polys %>% st_drop_geometry %>% select(id,name), by = c('NAME' = 'name')) %>%
filter(!is.na(id)) %>%
mutate(NFHL = grepl('A',floodZone) | grepl('V',floodZone))
## calculate observed 2021 housing units by tract
<-
hu_bytract %>% st_drop_geometry %>%
bg transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>%
mutate(hu_in = hu*pctarea, hu_out = hu*(1-pctarea)) %>%
group_by(county, tract) %>%
summarize(
hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = round(sum(hu_out)),
.groups = 'drop')
## calculate observed 2021 policies by tract
<- policies.poly %>%
policies_bytract count(county, tract, NFHL) %>%
mutate(temp = case_when(NFHL ~ 'policies_in', TRUE ~ 'policies_out')) %>%
select(-NFHL) %>%
pivot_wider(id_cols = c(county,tract), names_from = temp, values_from = n) %>%
mutate(across(starts_with('policies'), ~setNA(.,0)))
## calculate statewide observed NFIP takeup rates within & outside of the FEMA 100-year floodplain
<- Sum(policies_bytract$policies_in) / Sum(hu_bytract$hu_in)
takeup_in <- Sum(policies_bytract$policies_out) / Sum(hu_bytract$hu_out) takeup_out
## gut check: make sure we can reverse-engineer the correct number of observed policies
## total number of policies
%>% st_drop_geometry %>%
bg transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>%
mutate(
hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>%
summarize(exp_in = sum(exp_in), exp_out = sum(exp_out)) %>% sum
Sum(policies_bytract$policies_in) + Sum(policies_bytract$policies_out)
## policies in & outside of the floodplain
<- bg %>% st_drop_geometry %>%
takeup_bytract transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>%
mutate(
hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>%
group_by(county, tract) %>%
summarize(
hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = hu-hu_in,
exp_in = round(sum(exp_in)), exp_out = round(sum(exp_out)), .groups = 'drop') %>%
full_join(policies_bytract, by = c('county', 'tract')) %>%
mutate(across(c(ends_with('in'), ends_with('out')), ~setNA(.,0)))
%>% summarize(across(c(ends_with('in'), ends_with('out')), Sum))
takeup_bytract
## within <1% for both checks -> move on
## [1] 192516.5
## [1] 192514
## # A tibble: 1 x 6
## hu_in exp_in policies_in hu_out exp_out policies_out
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 786049 99317 99326 13542490 93193 93188
## calculate observed 2021 housing units by county
<-
hu_bycounty %>% st_drop_geometry %>%
bg transmute(county = COUNTYFP, pctarea = partarea/area, hu) %>%
mutate(hu_in = hu*pctarea, hu_out = hu*(1-pctarea)) %>%
group_by(county) %>%
summarize(
hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = round(sum(hu_out)),
.groups = 'drop')
## calculate observed 2021 policies by tract
<-
policies_bycounty %>%
policies.poly count(county, NFHL) %>%
mutate(temp = case_when(NFHL ~ 'policies_in', TRUE ~ 'policies_out')) %>%
select(-NFHL) %>%
pivot_wider(id_cols = county, names_from = temp, values_from = n) %>%
mutate(across(starts_with('policies'), ~setNA(.,0)))
## calculate statewide observed NFIP takeup rates within & outside of the FEMA 100-year floodplain
<- Sum(policies_bycounty$policies_in) / Sum(hu_bycounty$hu_in)
takeup_in <- Sum(policies_bycounty$policies_out) / Sum(hu_bycounty$hu_out) takeup_out
## gut check
Sum(policies_bycounty$policies_in)+Sum(policies_bycounty$policies_out)) /
(Sum(hu_bycounty$hu_in)+Sum(hu_bycounty$hu_out))
(nrow(policies.poly) / Sum(bg$hu)
## [1] 0.0134357
## [1] 0.0134357
<-
takeup_bycounty %>% st_drop_geometry %>%
bg transmute(county = COUNTYFP, pctarea = partarea/area, hu) %>%
mutate(
hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>%
group_by(county) %>%
summarize(
hu = Sum(hu), hu_in = round(Sum(hu_in)), hu_out = hu-hu_in,
exp_in = round(Sum(exp_in)), exp_out = round(Sum(exp_out)),
.groups = 'drop') %>%
full_join(policies_bycounty, by = 'county') %>%
mutate(across(c(ends_with('in'), ends_with('out')), ~setNA(.,0))) %>%
left_join(
%>% st_drop_geometry %>% select(NAME,COUNTYFP),
california by = c('county' = 'COUNTYFP')) %>%
mutate(hu = hu_in+hu_out, policies = policies_in+policies_out, exp = exp_in+exp_out) %>%
mutate(takeup_obs = policies/hu, takeup_exp = exp/hu)
%>% summarize(across(c(ends_with('in'), ends_with('out')), Sum))
takeup_bycounty
%>%
takeup_bycounty select(NAME, policies_in, exp_in, policies_out, exp_out, takeup_obs, takeup_exp) %>%
%>%
gt fmt_markdown(NAME) %>%
fmt_number(c(ends_with('in'), ends_with('out')), decimals = 0) %>%
fmt_percent(starts_with('takeup')) %>%
tab_spanner(label = html('Policyholders<br>Within Floodplain'), columns = ends_with('in')) %>%
tab_spanner(label = html('Policyholders<br>Outside Floodplain'), columns = ends_with('out')) %>%
tab_spanner(label = html('County-Level<br>Takeup Rate'), columns = starts_with('takeup')) %>%
cols_label(
NAME = 'County',
policies_in = 'Observed',
exp_in = 'Expected',
policies_out = 'Observed',
exp_out = 'Expected',
takeup_obs = 'Observed',
takeup_exp = 'Expected'
%>%
) tab_header('Observed vs. Expected County-Level NFIP Behavior') %>%
tab_options(
heading.background.color = '#d9d9d9',
column_labels.background.color = '#f2f2f2')
## # A tibble: 1 x 6
## hu_in exp_in policies_in hu_out exp_out policies_out
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 786071 99328 99326 13542468 93188 93188
Observed vs. Expected County-Level NFIP Behavior | ||||||
---|---|---|---|---|---|---|
County |
Policyholders Within Floodplain |
Policyholders Outside Floodplain |
County-Level Takeup Rate |
|||
Observed | Expected | Observed | Expected | Observed | Expected | |
Alameda |
1,543 | 3,469 | 2,705 | 4,057 | 0.69% | 1.22% |
Alpine |
0 | 0 | 3 | 11 | 0.18% | 0.68% |
Amador |
123 | 101 | 37 | 123 | 0.86% | 1.20% |
Butte |
947 | 1,169 | 914 | 590 | 1.96% | 1.85% |
Calaveras |
34 | 158 | 112 | 180 | 0.53% | 1.23% |
Colusa |
175 | 239 | 234 | 43 | 5.06% | 3.49% |
Contra Costa |
1,942 | 3,420 | 1,378 | 2,712 | 0.79% | 1.46% |
Del Norte |
80 | 165 | 64 | 67 | 1.30% | 2.09% |
El Dorado |
92 | 513 | 248 | 612 | 0.37% | 1.21% |
Fresno |
719 | 1,352 | 626 | 2,242 | 0.40% | 1.07% |
Glenn |
285 | 198 | 123 | 64 | 3.75% | 2.41% |
Humboldt |
346 | 1,202 | 215 | 362 | 0.90% | 2.52% |
Imperial |
33 | 524 | 19 | 361 | 0.09% | 1.56% |
Inyo |
5 | 36 | 40 | 63 | 0.48% | 1.05% |
Kern |
3,647 | 2,315 | 889 | 1,937 | 1.51% | 1.42% |
Kings |
125 | 316 | 113 | 300 | 0.52% | 1.33% |
Lake |
1,231 | 1,200 | 134 | 171 | 3.98% | 4.00% |
Lassen |
42 | 160 | 25 | 76 | 0.55% | 1.92% |
Los Angeles |
4,178 | 7,295 | 11,911 | 24,229 | 0.45% | 0.88% |
Madera |
531 | 484 | 95 | 314 | 1.26% | 1.61% |
Marin |
3,089 | 2,570 | 2,660 | 628 | 5.15% | 2.87% |
Mariposa |
0 | 22 | 0 | 66 | 0.00% | 0.90% |
Mendocino |
496 | 564 | 164 | 253 | 1.60% | 1.98% |
Merced |
4,583 | 2,591 | 317 | 456 | 5.65% | 3.51% |
Modoc |
23 | 89 | 55 | 28 | 1.62% | 2.43% |
Mono |
15 | 42 | 88 | 91 | 0.76% | 0.98% |
Monterey |
1,052 | 1,321 | 770 | 913 | 1.27% | 1.56% |
Napa |
813 | 786 | 494 | 339 | 2.36% | 2.03% |
Nevada |
53 | 236 | 104 | 355 | 0.29% | 1.11% |
Orange |
4,394 | 8,481 | 6,619 | 7,257 | 0.98% | 1.40% |
Placer |
335 | 1,136 | 820 | 1,110 | 0.68% | 1.32% |
Plumas |
79 | 121 | 53 | 100 | 0.86% | 1.43% |
Riverside |
3,047 | 6,053 | 2,251 | 5,481 | 0.63% | 1.37% |
Sacramento |
29,866 | 8,999 | 20,064 | 3,528 | 8.55% | 2.15% |
San Benito |
104 | 138 | 55 | 130 | 0.80% | 1.34% |
San Bernardino |
2,098 | 3,536 | 1,104 | 4,822 | 0.44% | 1.15% |
San Diego |
3,438 | 6,602 | 5,110 | 8,047 | 0.70% | 1.20% |
San Francisco |
5 | 4 | 170 | 2,785 | 0.04% | 0.69% |
San Joaquin |
1,572 | 2,324 | 4,475 | 1,587 | 2.43% | 1.57% |
San Luis Obispo |
983 | 1,745 | 851 | 752 | 1.49% | 2.03% |
San Mateo |
1,989 | 3,083 | 2,763 | 1,777 | 1.68% | 1.72% |
Santa Barbara |
1,565 | 1,773 | 1,940 | 989 | 2.22% | 1.75% |
Santa Clara |
9,956 | 7,257 | 1,949 | 4,308 | 1.74% | 1.69% |
Santa Cruz |
2,010 | 1,710 | 724 | 637 | 2.58% | 2.21% |
Shasta |
474 | 824 | 417 | 502 | 1.12% | 1.67% |
Sierra |
55 | 7 | 15 | 14 | 3.33% | 1.00% |
Siskiyou |
179 | 138 | 103 | 151 | 1.22% | 1.25% |
Solano |
995 | 1,890 | 1,258 | 1,007 | 1.40% | 1.80% |
Sonoma |
1,894 | 1,220 | 1,204 | 1,345 | 1.51% | 1.25% |
Stanislaus |
545 | 880 | 333 | 1,209 | 0.48% | 1.14% |
Sutter |
352 | 316 | 4,673 | 220 | 14.58% | 1.56% |
Tehama |
683 | 655 | 247 | 153 | 3.40% | 2.95% |
Trinity |
31 | 12 | 36 | 56 | 0.81% | 0.83% |
Tulare |
2,790 | 3,075 | 2,694 | 864 | 3.66% | 2.63% |
Tuolumne |
7 | 75 | 31 | 212 | 0.12% | 0.91% |
Ventura |
2,874 | 2,959 | 3,249 | 1,850 | 2.10% | 1.65% |
Yolo |
619 | 1,151 | 3,072 | 486 | 4.63% | 2.05% |
Yuba |
185 | 627 | 2,371 | 166 | 8.77% | 2.72% |
<- ggplot(takeup_bycounty) +
g1 geom_ribbon(
data = data.frame(x = (0:(max(takeup_bycounty$takeup_exp+0.01)*1e4))/1e4) %>%
mutate(lower = positive(x-0.01), upper = x+0.01),
aes(x = x, ymin = lower, ymax = upper), fill = 'grey70', alpha = 0.25) +
geom_point(aes(x = takeup_exp, y = takeup_obs), size = 1) +
geom_text(
data = takeup_bycounty %>% filter(abs(takeup_exp-takeup_obs) > 0.03),
aes(x = takeup_exp, y = takeup_obs, label = NAME),
family = 'Segoe UI', size = 7/.pt, color = 'grey40',
nudge_x = 0, nudge_y = -0.005) +
scale_x_origin(labels = percent) + scale_y_origin(labels = percent) +
geom_parity() + coord_cartesian(xlim = range(takeup_bycounty$takeup_exp)) +
labs(x = 'Expected Takeup Rate', y = 'Observed Takeup Rate') +
theme(panel.grid.major = element_line(size = 0.25))
<- ggplot(takeup_bycounty) +
g2 geom_ribbon(
data = data.frame(x = 10*(1:(max(takeup_bycounty$exp)/10+1e3))) %>%
mutate(half = x/2, double = x*2),
aes(x = x, ymin = half, ymax = double), fill = 'grey70', alpha = 0.25) +
geom_text(
data = takeup_bycounty %>% filter(policies/exp > 5),
aes(x = exp, y = policies*1.45, label = NAME),
family = 'Segoe UI', size = 7/.pt, color = 'grey40',
nudge_x = 0, nudge_y = 0) +
geom_text(
data = takeup_bycounty %>% filter(exp/policies > 5),
aes(x = exp, y = (policies+c(0,1.5,0,0))/1.35, label = NAME),
family = 'Segoe UI', size = 7/.pt, color = 'grey40',
nudge_x = c(0.15,0,0,0,0), nudge_y = c(0,0,0,0,0)) +
geom_point(aes(x = exp, y = policies), size = 1) +
scale_x_log10(labels = comma) + scale_y_log10(labels = comma) +
annotation_logticks(sides = 'bl', size = 0.25, color = 'grey25') +
geom_parity() + coord_fixed(clip = 'off', xlim = range(takeup_bycounty$exp)) +
labs(x = 'Expected Number of Policies', y = 'Observed Number of Policies') +
theme(panel.grid.major = element_line(size = 0.25))
+ g1 +
g2 plot_layout(widths = c(1,1)) +
plot_annotation(
# title = 'County-Level Takeup Rates',
tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') &
theme(
plot.tag = element_text(size = 10, face = 'bold'),
plot.tag.position = c(0,1))
ggsave('_figures/fig6_takeup.png', width = 6, height = 3.5)