Setup

Load functions & packages

source('_data/setup.R')

suppressPackageStartupMessages({
  require(DMwR)
  require(themis)
})

Load data

load('_data/_checkpoints/county_catalog_0705.Rdata')
load('_results/county_rfmodel_1981.Rdata')
load('_data/_checkpoints/bg_0417.Rdata')
load('_data/policies_save.Rdata')

polys <- california %>% 
  arrange(NAME) %>%  
  transmute(id = 1:nrow(.), name = NAME, geometry)

Clean up 2021 policies & aggregate by county

policies.poly <- policies %>%
  transmute(
    floodZone,
    county = str_sub(countyCode, start = 3, end = 5),
    tract = str_sub(censusTract, start = 6),
    policy_start = ymd_hms(policyEffectiveDate),
    policy_end = ymd_hms(policyTerminationDate)) %>%
  filter(complete.cases(.)) %>%
  filter(ymd('2021-10-01') %within% interval(policy_start, policy_end)) %>%
  left_join(california %>% st_drop_geometry %>% select(COUNTYFP,NAME), by = c('county' = 'COUNTYFP')) %>%
  left_join(polys %>% st_drop_geometry %>% select(id,name), by = c('NAME' = 'name')) %>%
  filter(!is.na(id)) %>% 
  mutate(NFHL = grepl('A',floodZone) | grepl('V',floodZone))

Census Tract

Check methodology at the census tract level

## calculate observed 2021 housing units by tract
hu_bytract <-   
  bg %>% st_drop_geometry %>% 
  transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>% 
  mutate(hu_in = hu*pctarea, hu_out = hu*(1-pctarea)) %>% 
  group_by(county, tract) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = round(sum(hu_out)), 
    .groups = 'drop')

## calculate observed 2021 policies by tract
policies_bytract <- policies.poly %>%
  count(county, tract, NFHL) %>% 
  mutate(temp = case_when(NFHL ~ 'policies_in', TRUE ~ 'policies_out')) %>% 
  select(-NFHL) %>% 
  pivot_wider(id_cols = c(county,tract), names_from = temp, values_from = n) %>% 
  mutate(across(starts_with('policies'), ~setNA(.,0)))

## calculate statewide observed NFIP takeup rates within & outside of the FEMA 100-year floodplain
takeup_in <- Sum(policies_bytract$policies_in) / Sum(hu_bytract$hu_in)
takeup_out <- Sum(policies_bytract$policies_out) / Sum(hu_bytract$hu_out)

## gut check: make sure we can reverse-engineer the correct number of observed policies

## total number of policies
bg %>% st_drop_geometry %>% 
  transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>% 
  mutate(
    hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
    exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>% 
  summarize(exp_in = sum(exp_in), exp_out = sum(exp_out)) %>% sum
Sum(policies_bytract$policies_in) + Sum(policies_bytract$policies_out)

## policies in & outside of the floodplain
takeup_bytract <- bg %>% st_drop_geometry %>%
  transmute(county = COUNTYFP, tract = TRACTCE, pctarea = partarea/area, hu) %>% 
  mutate(
    hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
    exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>% 
  group_by(county, tract) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = hu-hu_in,
    exp_in = round(sum(exp_in)), exp_out = round(sum(exp_out)), .groups = 'drop') %>% 
  full_join(policies_bytract, by = c('county', 'tract')) %>% 
  mutate(across(c(ends_with('in'), ends_with('out')), ~setNA(.,0))) 
takeup_bytract %>% summarize(across(c(ends_with('in'), ends_with('out')), Sum))

## within <1% for both checks -> move on

## [1] 192516.5
## [1] 192514
## # A tibble: 1 x 6
##    hu_in exp_in policies_in   hu_out exp_out policies_out
##    <dbl>  <dbl>       <dbl>    <dbl>   <dbl>        <dbl>
## 1 786049  99317       99326 13542490   93193        93188

County

Repeat calculations at the county level

## calculate observed 2021 housing units by county
hu_bycounty <- 
  bg %>% st_drop_geometry %>% 
  transmute(county = COUNTYFP, pctarea = partarea/area, hu) %>% 
  mutate(hu_in = hu*pctarea, hu_out = hu*(1-pctarea)) %>% 
  group_by(county) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(sum(hu_in)), hu_out = round(sum(hu_out)), 
    .groups = 'drop')

## calculate observed 2021 policies by tract
policies_bycounty <- 
  policies.poly %>%
  count(county, NFHL) %>% 
  mutate(temp = case_when(NFHL ~ 'policies_in', TRUE ~ 'policies_out')) %>% 
  select(-NFHL) %>% 
  pivot_wider(id_cols = county, names_from = temp, values_from = n) %>% 
  mutate(across(starts_with('policies'), ~setNA(.,0)))

## calculate statewide observed NFIP takeup rates within & outside of the FEMA 100-year floodplain
takeup_in <- Sum(policies_bycounty$policies_in) / Sum(hu_bycounty$hu_in)
takeup_out <- Sum(policies_bycounty$policies_out) / Sum(hu_bycounty$hu_out)

## gut check
(Sum(policies_bycounty$policies_in)+Sum(policies_bycounty$policies_out)) /
  (Sum(hu_bycounty$hu_in)+Sum(hu_bycounty$hu_out))
nrow(policies.poly) / Sum(bg$hu)

## [1] 0.0134357
## [1] 0.0134357

takeup_bycounty <- 
  bg %>% st_drop_geometry %>%
  transmute(county = COUNTYFP, pctarea = partarea/area, hu) %>% 
  mutate(
    hu_in = hu*pctarea, hu_out = hu*(1-pctarea),
    exp_in = takeup_in*hu_in, exp_out = takeup_out*hu_out) %>% 
  group_by(county) %>% 
  summarize(
    hu = Sum(hu), hu_in = round(Sum(hu_in)), hu_out = hu-hu_in,
    exp_in = round(Sum(exp_in)), exp_out = round(Sum(exp_out)), 
    .groups = 'drop') %>% 
  full_join(policies_bycounty, by = 'county') %>% 
  mutate(across(c(ends_with('in'), ends_with('out')), ~setNA(.,0))) %>% 
  left_join(
    california %>% st_drop_geometry %>% select(NAME,COUNTYFP), 
    by = c('county' = 'COUNTYFP')) %>% 
  mutate(hu = hu_in+hu_out, policies = policies_in+policies_out, exp = exp_in+exp_out) %>%
  mutate(takeup_obs = policies/hu, takeup_exp = exp/hu)
takeup_bycounty %>% summarize(across(c(ends_with('in'), ends_with('out')), Sum))

takeup_bycounty %>% 
  select(NAME, policies_in, exp_in, policies_out, exp_out, takeup_obs, takeup_exp) %>% 
  gt %>% 
  fmt_markdown(NAME) %>% 
  fmt_number(c(ends_with('in'), ends_with('out')), decimals = 0) %>% 
  fmt_percent(starts_with('takeup')) %>% 
  tab_spanner(label = html('Policyholders<br>Within Floodplain'), columns = ends_with('in')) %>% 
  tab_spanner(label = html('Policyholders<br>Outside Floodplain'), columns = ends_with('out')) %>% 
  tab_spanner(label = html('County-Level<br>Takeup Rate'), columns = starts_with('takeup')) %>% 
  cols_label(
    NAME = 'County',
    policies_in = 'Observed',
    exp_in = 'Expected',
    policies_out = 'Observed',
    exp_out = 'Expected',
    takeup_obs = 'Observed',
    takeup_exp = 'Expected'
  ) %>% 
  tab_header('Observed vs. Expected County-Level NFIP Behavior') %>% 
  tab_options(
    heading.background.color = '#d9d9d9', 
    column_labels.background.color = '#f2f2f2')

## # A tibble: 1 x 6
##    hu_in exp_in policies_in   hu_out exp_out policies_out
##    <dbl>  <dbl>       <dbl>    <dbl>   <dbl>        <dbl>
## 1 786071  99328       99326 13542468   93188        93188

Observed vs. Expected County-Level NFIP Behavior
County	Policyholders Within Floodplain		Policyholders Outside Floodplain		County-Level Takeup Rate
County	Observed	Expected	Observed	Expected	Observed	Expected
Alameda	1,543	3,469	2,705	4,057	0.69%	1.22%
Alpine	0	0	3	11	0.18%	0.68%
Amador	123	101	37	123	0.86%	1.20%
Butte	947	1,169	914	590	1.96%	1.85%
Calaveras	34	158	112	180	0.53%	1.23%
Colusa	175	239	234	43	5.06%	3.49%
Contra Costa	1,942	3,420	1,378	2,712	0.79%	1.46%
Del Norte	80	165	64	67	1.30%	2.09%
El Dorado	92	513	248	612	0.37%	1.21%
Fresno	719	1,352	626	2,242	0.40%	1.07%
Glenn	285	198	123	64	3.75%	2.41%
Humboldt	346	1,202	215	362	0.90%	2.52%
Imperial	33	524	19	361	0.09%	1.56%
Inyo	5	36	40	63	0.48%	1.05%
Kern	3,647	2,315	889	1,937	1.51%	1.42%
Kings	125	316	113	300	0.52%	1.33%
Lake	1,231	1,200	134	171	3.98%	4.00%
Lassen	42	160	25	76	0.55%	1.92%
Los Angeles	4,178	7,295	11,911	24,229	0.45%	0.88%
Madera	531	484	95	314	1.26%	1.61%
Marin	3,089	2,570	2,660	628	5.15%	2.87%
Mariposa	0	22	0	66	0.00%	0.90%
Mendocino	496	564	164	253	1.60%	1.98%
Merced	4,583	2,591	317	456	5.65%	3.51%
Modoc	23	89	55	28	1.62%	2.43%
Mono	15	42	88	91	0.76%	0.98%
Monterey	1,052	1,321	770	913	1.27%	1.56%
Napa	813	786	494	339	2.36%	2.03%
Nevada	53	236	104	355	0.29%	1.11%
Orange	4,394	8,481	6,619	7,257	0.98%	1.40%
Placer	335	1,136	820	1,110	0.68%	1.32%
Plumas	79	121	53	100	0.86%	1.43%
Riverside	3,047	6,053	2,251	5,481	0.63%	1.37%
Sacramento	29,866	8,999	20,064	3,528	8.55%	2.15%
San Benito	104	138	55	130	0.80%	1.34%
San Bernardino	2,098	3,536	1,104	4,822	0.44%	1.15%
San Diego	3,438	6,602	5,110	8,047	0.70%	1.20%
San Francisco	5	4	170	2,785	0.04%	0.69%
San Joaquin	1,572	2,324	4,475	1,587	2.43%	1.57%
San Luis Obispo	983	1,745	851	752	1.49%	2.03%
San Mateo	1,989	3,083	2,763	1,777	1.68%	1.72%
Santa Barbara	1,565	1,773	1,940	989	2.22%	1.75%
Santa Clara	9,956	7,257	1,949	4,308	1.74%	1.69%
Santa Cruz	2,010	1,710	724	637	2.58%	2.21%
Shasta	474	824	417	502	1.12%	1.67%
Sierra	55	7	15	14	3.33%	1.00%
Siskiyou	179	138	103	151	1.22%	1.25%
Solano	995	1,890	1,258	1,007	1.40%	1.80%
Sonoma	1,894	1,220	1,204	1,345	1.51%	1.25%
Stanislaus	545	880	333	1,209	0.48%	1.14%
Sutter	352	316	4,673	220	14.58%	1.56%
Tehama	683	655	247	153	3.40%	2.95%
Trinity	31	12	36	56	0.81%	0.83%
Tulare	2,790	3,075	2,694	864	3.66%	2.63%
Tuolumne	7	75	31	212	0.12%	0.91%
Ventura	2,874	2,959	3,249	1,850	2.10%	1.65%
Yolo	619	1,151	3,072	486	4.63%	2.05%
Yuba	185	627	2,371	166	8.77%	2.72%

g1 <- ggplot(takeup_bycounty) + 
  geom_ribbon(
    data = data.frame(x = (0:(max(takeup_bycounty$takeup_exp+0.01)*1e4))/1e4) %>% 
      mutate(lower = positive(x-0.01), upper = x+0.01),
    aes(x = x, ymin = lower, ymax = upper), fill = 'grey70', alpha = 0.25) + 
  geom_point(aes(x = takeup_exp, y = takeup_obs), size = 1) + 
  geom_text(
    data = takeup_bycounty %>% filter(abs(takeup_exp-takeup_obs) > 0.03),
    aes(x = takeup_exp, y = takeup_obs, label = NAME),
    family = 'Segoe UI', size = 7/.pt, color = 'grey40', 
    nudge_x = 0, nudge_y = -0.005) + 
  scale_x_origin(labels = percent) + scale_y_origin(labels = percent) +
  geom_parity() + coord_cartesian(xlim = range(takeup_bycounty$takeup_exp)) + 
  labs(x = 'Expected Takeup Rate', y = 'Observed Takeup Rate') + 
  theme(panel.grid.major = element_line(size = 0.25))
g2 <- ggplot(takeup_bycounty) + 
  geom_ribbon(
    data = data.frame(x = 10*(1:(max(takeup_bycounty$exp)/10+1e3))) %>% 
      mutate(half = x/2, double = x*2),
    aes(x = x, ymin = half, ymax = double), fill = 'grey70', alpha = 0.25) + 
  geom_text(
    data = takeup_bycounty %>% filter(policies/exp > 5),
    aes(x = exp, y = policies*1.45, label = NAME),
    family = 'Segoe UI', size = 7/.pt, color = 'grey40', 
    nudge_x = 0, nudge_y = 0) + 
  geom_text(
    data = takeup_bycounty %>% filter(exp/policies > 5),
    aes(x = exp, y = (policies+c(0,1.5,0,0))/1.35, label = NAME),
    family = 'Segoe UI', size = 7/.pt, color = 'grey40', 
    nudge_x = c(0.15,0,0,0,0), nudge_y = c(0,0,0,0,0)) + 
  geom_point(aes(x = exp, y = policies), size = 1) + 
  scale_x_log10(labels = comma) + scale_y_log10(labels = comma) + 
  annotation_logticks(sides = 'bl', size = 0.25, color = 'grey25') + 
  geom_parity() + coord_fixed(clip = 'off', xlim = range(takeup_bycounty$exp)) + 
  labs(x = 'Expected Number of Policies', y = 'Observed Number of Policies') + 
  theme(panel.grid.major = element_line(size = 0.25))
g2 + g1 + 
  plot_layout(widths = c(1,1)) + 
  plot_annotation(
    # title = 'County-Level Takeup Rates',
    tag_levels = 'a', tag_prefix = '(', tag_suffix = ')') &
  theme(
    plot.tag = element_text(size = 10, face = 'bold'),
    plot.tag.position = c(0,1))
ggsave('_figures/fig6_takeup.png', width = 6, height = 3.5)

Observed vs. Expected NFIP Takeup Behavior

Corinne Bowers

2023-07-19

Setup

Load functions & packages

Load data

Clean up 2021 policies & aggregate by county

Census Tract

Check methodology at the census tract level

County

Repeat calculations at the county level