Setup

Load functions & packages

source('_data/setup.R')

suppressPackageStartupMessages({
  require(DMwR)
  require(themis)
})

Load data

load('_data/_checkpoints/county_catalog_0705.Rdata')
load('_results/county_rfmodel_1981.Rdata')
load('_data/_checkpoints/bg_0417.Rdata')
load('_data/policies_save.Rdata')

polys <- california %>% 
  arrange(NAME) %>%  
  transmute(id = 1:nrow(.), name = NAME, geometry) 

Clean up 2021 policies & aggregate by county

policies.poly <- policies %>%
  transmute(
    floodZone,
    county = str_sub(countyCode, start = 3, end = 5),
    tract = str_sub(censusTract, start = 6),
    policy_start = ymd_hms(policyEffectiveDate),
    policy_end = ymd_hms(policyTerminationDate)) %>%
  filter(complete.cases(.)) %>%
  filter(ymd('2021-10-01') %within% interval(policy_start, policy_end)) %>%
  left_join(california %>% st_drop_geometry %>% select(COUNTYFP,NAME), by = c('county' = 'COUNTYFP')) %>%
  left_join(polys %>% st_drop_geometry %>% select(id,name), by = c('NAME' = 'name')) %>%
  filter(!is.na(id)) %>% 
  mutate(NFHL = grepl('A',floodZone) | grepl('V',floodZone))

Census Tract

Check methodology at the census tract level

## calculate observed 2021 housing units by tract
hu_bytract <-   
  bg %>% st_drop_geometry %>% 
  transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>% 
  mutate(hu_in = hu*pctarea, hu_out = hu*(1-pctarea)) %>% 
  group_by(county, tract) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = round(sum(hu_out)), 
    .groups = 'drop')

## calculate observed 2021 policies by tract
policies_bytract <- policies.poly %>%
  count(county, tract, NFHL) %>% 
  mutate(temp = case_when(NFHL ~ 'policies_in', TRUE ~ 'policies_out')) %>% 
  select(-NFHL) %>% 
  pivot_wider(id_cols = c(county,tract), names_from = temp, values_from = n) %>% 
  mutate(across(starts_with('policies'), ~setNA(.,0)))

## calculate statewide observed NFIP takeup rates within & outside of the FEMA 100-year floodplain
takeup_in <- Sum(policies_bytract$policies_in) / Sum(hu_bytract$hu_in)
takeup_out <- Sum(policies_bytract$policies_out) / Sum(hu_bytract$hu_out)
## gut check: make sure we can reverse-engineer the correct number of observed policies

## total number of policies
bg %>% st_drop_geometry %>% 
  transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>% 
  mutate(
    hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
    exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>% 
  summarize(exp_in = sum(exp_in), exp_out = sum(exp_out)) %>% sum
Sum(policies_bytract$policies_in) + Sum(policies_bytract$policies_out)

## policies in & outside of the floodplain
takeup_bytract <- bg %>% st_drop_geometry %>%
  transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>% 
  mutate(
    hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
    exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>% 
  group_by(county, tract) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = hu-hu_in,
    exp_in = round(sum(exp_in)), exp_out = round(sum(exp_out)), .groups = 'drop') %>% 
  full_join(policies_bytract, by = c('county', 'tract')) %>% 
  mutate(across(c(ends_with('in'), ends_with('out')), ~setNA(.,0))) 
takeup_bytract %>% summarize(across(c(ends_with('in'), ends_with('out')), Sum))

## within <1% for both checks -> move on
## [1] 192516.5
## [1] 192514
## # A tibble: 1 x 6
##    hu_in exp_in policies_in   hu_out exp_out policies_out
##    <dbl>  <dbl>       <dbl>    <dbl>   <dbl>        <dbl>
## 1 786049  99317       99326 13542490   93193        93188

County

Repeat calculations at the county level

## calculate observed 2021 housing units by county
hu_bycounty <- 
  bg %>% st_drop_geometry %>% 
  transmute(county = COUNTYFP, pctarea = partarea/area, hu) %>% 
  mutate(hu_in = hu*pctarea, hu_out = hu*(1-pctarea)) %>% 
  group_by(county) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = round(sum(hu_out)), 
    .groups = 'drop')

## calculate observed 2021 policies by tract
policies_bycounty <- 
  policies.poly %>%
  count(county, NFHL) %>% 
  mutate(temp = case_when(NFHL ~ 'policies_in', TRUE ~ 'policies_out')) %>% 
  select(-NFHL) %>% 
  pivot_wider(id_cols = county, names_from = temp, values_from = n) %>% 
  mutate(across(starts_with('policies'), ~setNA(.,0)))

## calculate statewide observed NFIP takeup rates within & outside of the FEMA 100-year floodplain
takeup_in <- Sum(policies_bycounty$policies_in) / Sum(hu_bycounty$hu_in)
takeup_out <- Sum(policies_bycounty$policies_out) / Sum(hu_bycounty$hu_out)
## gut check
(Sum(policies_bycounty$policies_in)+Sum(policies_bycounty$policies_out)) /
  (Sum(hu_bycounty$hu_in)+Sum(hu_bycounty$hu_out))
nrow(policies.poly) / Sum(bg$hu)
## [1] 0.0134357
## [1] 0.0134357
takeup_bycounty <- 
  bg %>% st_drop_geometry %>%
  transmute(county = COUNTYFP, pctarea = partarea/area, hu) %>% 
  mutate(
    hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
    exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>% 
  group_by(county) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(Sum(hu_in)), hu_out = hu-hu_in,
    exp_in = round(Sum(exp_in)), exp_out = round(Sum(exp_out)), 
    .groups = 'drop') %>% 
  full_join(policies_bycounty, by = 'county') %>% 
  mutate(across(c(ends_with('in'), ends_with('out')), ~setNA(.,0))) %>% 
  left_join(
    california %>% st_drop_geometry %>% select(NAME,COUNTYFP), 
    by = c('county' = 'COUNTYFP')) %>% 
  mutate(hu = hu_in+hu_out, policies = policies_in+policies_out, exp = exp_in+exp_out) %>%
  mutate(takeup_obs = policies/hu, takeup_exp = exp/hu)
takeup_bycounty %>% summarize(across(c(ends_with('in'), ends_with('out')), Sum))

takeup_bycounty %>% 
  select(NAME, policies_in, exp_in, policies_out, exp_out, takeup_obs, takeup_exp) %>% 
  gt %>% 
  fmt_markdown(NAME) %>% 
  fmt_number(c(ends_with('in'), ends_with('out')), decimals = 0) %>% 
  fmt_percent(starts_with('takeup')) %>% 
  tab_spanner(label = html('Policyholders<br>Within Floodplain'), columns = ends_with('in')) %>% 
  tab_spanner(label = html('Policyholders<br>Outside Floodplain'), columns = ends_with('out')) %>% 
  tab_spanner(label = html('County-Level<br>Takeup Rate'), columns = starts_with('takeup')) %>% 
  cols_label(
    NAME = 'County',
    policies_in = 'Observed',
    exp_in = 'Expected',
    policies_out = 'Observed',
    exp_out = 'Expected',
    takeup_obs = 'Observed',
    takeup_exp = 'Expected'
  ) %>% 
  tab_header('Observed vs. Expected County-Level NFIP Behavior') %>% 
  tab_options(
    heading.background.color = '#d9d9d9', 
    column_labels.background.color = '#f2f2f2')
## # A tibble: 1 x 6
##    hu_in exp_in policies_in   hu_out exp_out policies_out
##    <dbl>  <dbl>       <dbl>    <dbl>   <dbl>        <dbl>
## 1 786071  99328       99326 13542468   93188        93188
Observed vs. Expected County-Level NFIP Behavior
County Policyholders
Within Floodplain
Policyholders
Outside Floodplain
County-Level
Takeup Rate
Observed Expected Observed Expected Observed Expected

Alameda

1,543 3,469 2,705 4,057 0.69% 1.22%

Alpine

0 0 3 11 0.18% 0.68%

Amador

123 101 37 123 0.86% 1.20%

Butte

947 1,169 914 590 1.96% 1.85%

Calaveras

34 158 112 180 0.53% 1.23%

Colusa

175 239 234 43 5.06% 3.49%

Contra Costa

1,942 3,420 1,378 2,712 0.79% 1.46%

Del Norte

80 165 64 67 1.30% 2.09%

El Dorado

92 513 248 612 0.37% 1.21%

Fresno

719 1,352 626 2,242 0.40% 1.07%

Glenn

285 198 123 64 3.75% 2.41%

Humboldt

346 1,202 215 362 0.90% 2.52%

Imperial

33 524 19 361 0.09% 1.56%

Inyo

5 36 40 63 0.48% 1.05%

Kern

3,647 2,315 889 1,937 1.51% 1.42%

Kings

125 316 113 300 0.52% 1.33%

Lake

1,231 1,200 134 171 3.98% 4.00%

Lassen

42 160 25 76 0.55% 1.92%

Los Angeles

4,178 7,295 11,911 24,229 0.45% 0.88%

Madera

531 484 95 314 1.26% 1.61%

Marin

3,089 2,570 2,660 628 5.15% 2.87%

Mariposa

0 22 0 66 0.00% 0.90%

Mendocino

496 564 164 253 1.60% 1.98%

Merced

4,583 2,591 317 456 5.65% 3.51%

Modoc

23 89 55 28 1.62% 2.43%

Mono

15 42 88 91 0.76% 0.98%

Monterey

1,052 1,321 770 913 1.27% 1.56%

Napa

813 786 494 339 2.36% 2.03%

Nevada

53 236 104 355 0.29% 1.11%

Orange

4,394 8,481 6,619 7,257 0.98% 1.40%

Placer

335 1,136 820 1,110 0.68% 1.32%

Plumas

79 121 53 100 0.86% 1.43%

Riverside

3,047 6,053 2,251 5,481 0.63% 1.37%

Sacramento

29,866 8,999 20,064 3,528 8.55% 2.15%

San Benito

104 138 55 130 0.80% 1.34%

San Bernardino

2,098 3,536 1,104 4,822 0.44% 1.15%

San Diego

3,438 6,602 5,110 8,047 0.70% 1.20%

San Francisco

5 4 170 2,785 0.04% 0.69%

San Joaquin

1,572 2,324 4,475 1,587 2.43% 1.57%

San Luis Obispo

983 1,745 851 752 1.49% 2.03%

San Mateo

1,989 3,083 2,763 1,777 1.68% 1.72%

Santa Barbara

1,565 1,773 1,940 989 2.22% 1.75%

Santa Clara

9,956 7,257 1,949 4,308 1.74% 1.69%

Santa Cruz

2,010 1,710 724 637 2.58% 2.21%

Shasta

474 824 417 502 1.12% 1.67%

Sierra

55 7 15 14 3.33% 1.00%

Siskiyou

179 138 103 151 1.22% 1.25%

Solano

995 1,890 1,258 1,007 1.40% 1.80%

Sonoma

1,894 1,220 1,204 1,345 1.51% 1.25%

Stanislaus

545 880 333 1,209 0.48% 1.14%

Sutter

352 316 4,673 220 14.58% 1.56%

Tehama

683 655 247 153 3.40% 2.95%

Trinity

31 12 36 56 0.81% 0.83%

Tulare

2,790 3,075 2,694 864 3.66% 2.63%

Tuolumne

7 75 31 212 0.12% 0.91%

Ventura

2,874 2,959 3,249 1,850 2.10% 1.65%

Yolo

619 1,151 3,072 486 4.63% 2.05%

Yuba

185 627 2,371 166 8.77% 2.72%
g1 <- ggplot(takeup_bycounty) + 
  geom_ribbon(
    data = data.frame(x = (0:(max(takeup_bycounty$takeup_exp+0.01)*1e4))/1e4) %>% 
      mutate(lower = positive(x-0.01), upper = x+0.01),
    aes(x = x, ymin = lower, ymax = upper), fill = 'grey70', alpha = 0.25) + 
  geom_point(aes(x = takeup_exp, y = takeup_obs), size = 1) + 
  geom_text(
    data = takeup_bycounty %>% filter(abs(takeup_exp-takeup_obs) > 0.03),
    aes(x = takeup_exp, y = takeup_obs, label = NAME),
    family = 'Segoe UI', size = 7/.pt, color = 'grey40', 
    nudge_x = 0, nudge_y = -0.005) + 
  scale_x_origin(labels = percent) + scale_y_origin(labels = percent) +
  geom_parity() + coord_cartesian(xlim = range(takeup_bycounty$takeup_exp)) + 
  labs(x = 'Expected Takeup Rate', y = 'Observed Takeup Rate') + 
  theme(panel.grid.major = element_line(size = 0.25))
g2 <- ggplot(takeup_bycounty) + 
  geom_ribbon(
    data = data.frame(x = 10*(1:(max(takeup_bycounty$exp)/10+1e3))) %>% 
      mutate(half = x/2, double = x*2),
    aes(x = x, ymin = half, ymax = double), fill = 'grey70', alpha = 0.25) + 
  geom_text(
    data = takeup_bycounty %>% filter(policies/exp > 5),
    aes(x = exp, y = policies*1.45, label = NAME),
    family = 'Segoe UI', size = 7/.pt, color = 'grey40', 
    nudge_x = 0, nudge_y = 0) + 
  geom_text(
    data = takeup_bycounty %>% filter(exp/policies > 5),
    aes(x = exp, y = (policies+c(0,1.5,0,0))/1.35, label = NAME),
    family = 'Segoe UI', size = 7/.pt, color = 'grey40', 
    nudge_x = c(0.15,0,0,0,0), nudge_y = c(0,0,0,0,0)) + 
  geom_point(aes(x = exp, y = policies), size = 1) + 
  scale_x_log10(labels = comma) + scale_y_log10(labels = comma) + 
  annotation_logticks(sides = 'bl', size = 0.25, color = 'grey25') + 
  geom_parity() + coord_fixed(clip = 'off', xlim = range(takeup_bycounty$exp)) + 
  labs(x = 'Expected Number of Policies', y = 'Observed Number of Policies') + 
  theme(panel.grid.major = element_line(size = 0.25))
g2 + g1 + 
  plot_layout(widths = c(1,1)) + 
  plot_annotation(
    # title = 'County-Level Takeup Rates',
    tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') &
  theme(
    plot.tag = element_text(size = 10, face = 'bold'),
    plot.tag.position = c(0,1))
ggsave('_figures/fig6_takeup.png', width = 6, height = 3.5)