2 Data collection

2.2 Survey of Income and Program Participation (SIPP) Datasets

2.2.2 Read SIPP 2014 Panel Data

# `labels.csv` generated by manually copying and cleaning `Table of Contents` of `SIPP Public Use Metadata Report`:
# `https://www2.census.gov/programs-surveys/sipp/data/datasets/2014/w4/2014SIPP_W4_Metadata_AllSections.pdf`
labels = read.csv2('labels.csv')

# Select fields that we are interested in
col_sel = c(
  'ssuid', # Sample unit identifier. Basically household ID
  'shhadid', # Household address ID
  'pnum', # Person number.
  'wpfinwgt', # Final person weight
  'erelrpe', # Need to calculate household weight
  'monthcode', 
  'swave',
  'eresidenceid',
  'esex',
  'tage',
  'tprval', # Property value of primary residence
  'tprloanamt', # Principal owed on the first three mortgages and loans against the primary residence
  'tirakeoval',# Value of IRA and KEOGH accounts
  'tthr401val', # Value of 401k, 403b, 503b, and Thrift Savings Plan accounts
  'elife_type', # Type of life insurance policy owned, 1 Term, 2 Whole, 3 Both
  'tlife_cval',
  'tlife_fval',
  'tlifeamt', # What was the total amount of life insurance payments ... received
  'tsur7amt',
  'tpothinc',
  'thtotinc',
  'totcmdpay',
  'tmdpay'
) %>% union(labels$label[labels$category == 'Asset_Amount'] %>% tolower) %>%
  setdiff(c('tmhloan_num', 'tbus_inv_num')) ## 'tmhloan_num' and 'tbus_inv_num' are not included in all waves (and we don't need them)

time = Sys.time()
wealth16.all <- read_stata('SIPP/pu2014w4.dta', col_select = all_of(col_sel)) %>% distinct
# Use distinct in case of duplicate records
time - Sys.time()

wealth15.all <- read_stata('SIPP/pu2014w3.dta', col_select = all_of(col_sel)) %>% distinct
wealth14.all <- read_stata('SIPP/pu2014w2.dta', col_select = all_of(col_sel)) %>% distinct
wealth13.all <- read_stata('SIPP/pu2014w1.dta', col_select = all_of(col_sel)) %>% distinct

all_wave <- rbind(wealth13.all, wealth14.all, wealth15.all, wealth16.all)

# We only need year-level data, so extract month 12
# https://www2.census.gov/programs-surveys/sipp/Select_approp_wgt_2014SIPPpanel.pdf
all_wave_annual <- all_wave %>% filter(monthcode == 12) %>% select(-monthcode)

# Save intermediary dataset so that we don't need to read all the data each time
save(all_wave_annual, file = 'all_wave_annual.rda')

colmname description
ssuid Sample unit identifier. This identifier is created by scrambling together PSU, S
swave Wave number of interview
erelrpe Household relationship (detailed categories)
wpfinwgt Final person weight
tirakeoval Value of IRA and KEOGH accounts as of the last day of the reference period.
tthr401val Value of 401k, 403b, 503b, and Thrift Savings Plan accounts as of the last day o
tlife_fval Face value of life insurance policies as of the last day of the reference period
tlife_cval Cash value of life insurance policies as of the last day of the reference period
tannval Value of equity in annuities as of the last day of the reference period.
tinc_bank Person-level sum of income earned over the reference period from interest-earnin
tval_bank Person-level sum of value of assets held at financial institutions (TJSICHKVAL,
tinc_stmf Person-level sum of income earned over the reference period from stocks and mutu
tval_stmf Person-level sum of value of stocks and mutual funds (TJSSTVAL, TJOSTVAL, TOSTVA
tinc_bond Person-level sum of income earned over the reference period from other interest-
tval_bond Person-level sum of value of other interest-earning assets (TJSGOVSVAL TJOGOVSVA
tinc_rent Person-level sum of net income from rental properties (TJSRPNETINC, TJORPNETINC,
tval_rent Person-level sum of value of rental properties (TJSRPVAL, TJORPVAL, TORPVAL).
teq_rent Person-level sum of equity in rental properties (TVAL_RENT, -TDEBT_RENT).
tval_re Person-level sum of value of other real estate (TJSREVAL, TJOREVAL, TOREVAL).
teq_re Person-level sum of equity in other real estate (TVAL_RE, -TDEBT_RE).
tinc_oth Person-level sum of income earned over the reference period from other assets (T
tval_oth Person-level sum of value of other assets (TOINVVAL, TANNVAL, TTRVAL, TLIFE_CVAL
tinc_ast Person-level sum of income earned over the reference period from all assets (TIN
tval_ret Person-level sum of value of retirement accounts (TTHR401VAL, TIRAKEOVAL).
tval_bus Person-level sum of value of businesses in which the person owns a share (TBSI(i
teq_bus Person-level sum of equity in businesses (TVAL_BUS, -TDEBT_BUS).
tval_home Person-level sum of value of primary residence (either TPROPVAL or TMHVAL) in wh
teq_home Person-level sum of equity in primary residence (TVAL_HOME -TDEBT_HOME)
tval_veh Person-level sum of value of all vehicles in which the person owns a share (as i
teq_veh Person-level sum of equity in vehicles (TVAL_VEH, -TDEBT_VEH).
tval_esav Person-level sum of value of educational savings accounts (TESAV(i)VAL for i=1,2
tval_rmu Person-level sum of rent, mortgage, and utility payments in December of the refe
tval_ast Person-level sum of all asset values (TVAL_BANK, TVAL_STMF, TVAL_BOND, TVAL_RENT
tnetworth Person-level net worth (TVAL_AST, -TDEBT_AST).

2.3 American Council of Life Insurers (ACLI) data

2.3.2 Join two ACLI data tables

2.4 National Association of Insurance Commissioners (NAIC) data

2.4.1 Read NAIC data from S&P Global Market Intelligence (2020)

# Data manually downloaded from `Financials, U.S. Life Statutory, NAIC Format`

# `Life Industry | LIFE ANALYSIS OF OPERATIONS BY LOB (PG. 6)`
NAICsurrender = read.csv2('surrender.csv') %>% as_tibble %>%
  filter(type %in% c('indl', 'ordinary', 'credit', 'group')) %>%
  gather(year, surrender_benefit, Y1996:Y2019) %>%
  mutate(surrender_benefit = as.numeric(surrender_benefit))

# Extracted field: Surrender Benefits, Withdrawals for Life Contracts ($000)
# Surrender benefits and withdrawals for life contracts includes all surrender 
# or other withdrawal benefit amounts incurred in connection with contract provisions for surrender or withdrawal.
# Excludes premium and annuity considerations for life contracts returned, 
# withdrawals on deposit-type contracts, 
# and amounts transferred to premium and annuity considerations,
# separate account or amounts redeposited.


# `Life Industry | LIFE EXHIBIT OF LIFE INSURANCE (PG. 25)`
NAICexhibits <- read.csv2('lifeexhibits.csv') %>% 
  as_tibble %>% 
  filter((nchar(category) > 1) & grepl('Lapsed|Surrendered', category)) %>%
  mutate_at(vars(paste0('Y', 1996:2019)), as.numeric)

NAICexhibits$category %<>% gsub('Policy & Cert', 'Policies', .)
NAICexhibits$category %<>% gsub('Certificates', 'Policies', .)

NAICexhibits <- NAICexhibits %>%
  group_by(type, category) %>%
  summarise_all(sum)

NAICexhibits <-  NAICexhibits %>% 
  gather(year, value, Y1996:Y2019) %>% 
  spread(category,value) %>%
  rename(
    lapse_face = `Insurance Lost: Lapsed`, 
    surrender_face = `Insurance Lost: Surrendered`,
    lapse_count = `Policies Lost: Lapsed`,
    surrender_count = `Policies Lost: Surrendered`) %>%
  mutate(
    terminated_face = lapse_face + surrender_face,
    terminated_count = lapse_count + surrender_count)

# Extracted field 1: Insurance Lost: Surrendered ($000)
# Surrender reports the cancellation from in force of the face amounts 
# (or adjusted amounts of insurance) 
# for policies that were surrendered by the owners for their cash value,
# or where a policy loan indebtedness 
# (loan principal plus accrued interest) 
# reached or exceeded the reserve value causing termination of insurance coverage.

# # Extracted field 2: Insurance Lost: Lapsed ($000)
# Lapse reports cancellation from in force of insurance without nonforfeiture provisions as the result of nonpayment of premiums prior to the normal expiration date of such insurance coverage.

2.4.2 Join two NAIC data tables

2.5 Life Insurance Marketing and Research Association (LIMRA) data

2.5.1 Read LIMRA data from LIMRA (2016)

2.6 National Health Expenditure (NHE) data

2.6.1 Read NHE data from U.S. Centers for Medicare & Medicaid Services (2019)

2.7 Federal Reserve data

2.7.1 Read Federal Reserve data from Board of Governors of the Federal Reserve System (US) (2020)

3 Data analysis

3.3 Life Insurance ownership at household level

3.5 Individual’s life Insurance value versus 401k value

person_data_bycash <- all_wave_annual[, c(
  'swave', 'wpfinwgt', 'tannval', 'tirakeoval', 'tthr401val', 'tlife_cval', 'tlife_fval',
  person_label$colmname)] %>%
  group_by(swave, havecashvalue = is.finite(tlife_cval)) %>% # Differentiate between the universe with cash value and the one without
  summarise_all(list(
    ~sum(., na.rm = T),
    ~weighted.sum(.,wpfinwgt)
  )) %>% 
  select(-wpfinwgt_weighted.sum) %>%
  mutate(cashratio = tlife_cval_weighted.sum/tlife_fval_weighted.sum) # Calculate cash_value:face_falue ratio by group (with ot without cash value)

person_data = person_data_bycash %>% 
  select(-c(havecashvalue,cashratio)) %>%
  group_by(swave) %>% 
  summarise_all(sum) %>%
  mutate(cashratio = tlife_cval_weighted.sum/tlife_fval_weighted.sum) # Calculate aggregate cash_value:face_falue ratio

swave_year = tibble(swave = as.numeric(1:4), year = 2013:2016)

ACLIdata = ACLIdata_all %>% 
  inner_join(swave_year) %>%
  full_join(person_data[, c('swave', 'cashratio')]) %>% # Add aggregate cash_value:face_falue ratio to the ACLI data
  mutate(evmultiplier = reserveratio/cashratio) # Calculate aggregate reserve:cash_value ratio


terminated_ins_ordind = NAICdata_all %>%
  full_join(swave_year) %>%
  filter(is.finite(swave) & (type %in% c('ordind'))) %>%
  select(-type) %>%
  full_join(person_data[, c('swave', 'cashratio')]) %>%
  mutate(
    surbenmultiplier = ben_ter_ratio/cashratio
  ) # Calculate aggregate surrender_benefit:cash_value ratio


person_with_cv <- person_data_bycash %>%
  filter(havecashvalue == T) %>% # Examine only the universe with cash value
  select(swave,
         wpfinwgt_sum, tthr401val_weighted.sum, tlife_cval_weighted.sum, tlife_fval_weighted.sum) %>%
  full_join(ACLIdata %>% filter(type == 'ordind') %>% select(swave, evmultiplier)) %>%
  full_join(terminated_ins_ordind) %>%
  mutate(
    meanfv = tlife_fval_weighted.sum/wpfinwgt_sum, # Mean face value life insurance
    meancv = tlife_cval_weighted.sum/wpfinwgt_sum, # Mean cash value life insurance
    meanev = meancv * evmultiplier, # Mean economic value life insurance
    meansb = meancv * surbenmultiplier, # Mean surrender benefit life insurance
    mean401k = tthr401val_weighted.sum/wpfinwgt_sum, # Mean 401k account value
    cashratio = tlife_cval_weighted.sum/tlife_fval_weighted.sum
  ) %>% 
  ungroup 

EVvs401 <- person_with_cv %>% 
  select(swave, meancv, meanev, meansb, mean401k, meanfv)

References

American Council of Life Insurers. 2019. “Life Insurers Fact Book.” https://www.acli.com/-/media/ACLI/Files/Fact-Books-Public/2019FLifeInsurersFactBook.ashx.

Board of Governors of the Federal Reserve System (US). 2020. “Households; Owner-Occupied Real Estate Including Vacant Land and Mobile Homes at Market Value, Market Value Levels.” https://fred.stlouisfed.org/series/HOOREVLMHMV.

Federal Reserve Board. 2018. “Survey of Consumer Finances (SCF).” https://www.federalreserve.gov/econres/scfindex.htm.

S&P Global Market Intelligence. 2020. “U.S. Life Industry Briefing Book.” https://platform.marketintelligence.spglobal.com/.

US Census Bureau. 2019. “Survey of Income and Program Participation Datasets 2014 Panel.” https://www.census.gov/programs-surveys/sipp/data/datasets.2014.html.

U.S. Centers for Medicare & Medicaid Services. 2019. “National Health Expenditure Data: Historical.” https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/NationalHealthExpendData/NationalHealthAccountsHistorical.

---
title: "Unlocking the Value of a Life in the Time of COVID-19"
author: "Alexander Braun, Lauren H. Cohen, Jiahua Xu"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  html_notebook:
    fig_caption: yes
    number_sections: yes
    toc: yes
    toc_depth: 2
    toc_float: yes
    highlight: tango
  html_document:
    df_print: paged
    fig_caption: yes
    number_sections: yes
    toc: yes
    toc_depth: 2
    toc_float: yes
  word_document:
    fig_caption: yes
    toc: yes
    toc_depth: 2
link-citations: yes
bibliography: lifesettlement.bib
subtitle: Supplementary Material
biblio-style: apelike
---

# Basic set-up

```{r setup, echo = T, error = F, cache = F, message = F, warning = F}
# Load libaries
library(data.table)
library(foreign)
library(magrittr)
library(readstata13)
library(haven)
library(tibble)
library(dplyr)
library(tidyr)
library(readxl)
library(reshape2)
library(ggplot2)
library(alfred)
library(lodown)
library(survey)
library(mitools)
library(knitr) # for `opts_chunk$set`
library(R.utils) # for `gunzip`

## General settings for code chunks
opts_chunk$set(#dev='tikz', 
               fig.path = './figure/', 
               echo = T,
               error = F, cache = F, message = F, warning = F
               # ,
               # results='hide',
               # autodep=T,
               # fig.height=6
               )

options(stringsAsFactors = F)

ownfract = function(x, wt){
  sum(wt[x!=0])/sum(wt)
}

weighted.sum = function(x, wt){
  sum(x*wt, na.rm = T)
}
```

# Data collection

## Survey of Consumer Finances (SCF) data

### Download SCF data from @FederalReserveBoard2018

```{r Get_SCF_data, eval = F}
# Download all SCF data into directory `SCF`
# Instructions: http://asdfree.com/survey-of-consumer-finances-scf.html
lodown('scf', output_dir = 'SCF') # Only needs to be run once
```

### Read SCF data

```{r Read_SCF_data, eval = T}
# Read only 2016 SCF data
scf_imp <- readRDS('SCF/scf 2016.rds')
scf_rw <- readRDS('SCF/scf 2016 rw.rds')

scf_design <- 
  svrepdesign( 
    weights = ~wgt, 
    repweights = scf_rw[, -1] , 
    data = imputationList(scf_imp) , 
    scale = 1 ,
    rscales = rep( 1 / 998, 999) ,
    mse = F,
    type = 'other',
    combined.weights = T
  )
```


## Survey of Income and Program Participation (SIPP) Datasets

### Download SIPP 2014 Panel Data from @USCensusBureau2019

```{r Get_SIPP_data, eval = F}
# A simple function to download and unzip SIPP data
download_sipp <- function(wave){
  file_name <- paste0('pu2014w',wave,'.dta')
  gzip_file <- paste0(file_name, '.gz')
  
  # Download all SIPP data into directory `SIPP`
  dest_file <- paste0('./SIPP/',gzip_file)
  
  download.file(
    url = paste0('https://www2.census.gov/programs-surveys/sipp/data/datasets/2014/w', wave, '/', gzip_file),
    destfile = dest_file
    )
  
  gunzip(dest_file)
  
  # print uncompressed file size
  print(
    paste(file_name, 'size:', round(file.size(file_name)/1e9), 'G')
  )
}


download_sipp(4) # Download and unzip wave 4 data (year 2016)
download_sipp(3) # Download and unzip wave 3 data (year 2015)
download_sipp(2) # Download and unzip wave 2 data (year 2014)
download_sipp(1) # Download and unzip wave 1 data (year 2013)
```

### Read SIPP 2014 Panel Data

```{r Read_Census_Data, eval = F}
# `labels.csv` generated by manually copying and cleaning `Table of Contents` of `SIPP Public Use Metadata Report`:
# `https://www2.census.gov/programs-surveys/sipp/data/datasets/2014/w4/2014SIPP_W4_Metadata_AllSections.pdf`
labels = read.csv2('labels.csv')

# Select fields that we are interested in
col_sel = c(
  'ssuid', # Sample unit identifier. Basically household ID
  'shhadid', # Household address ID
  'pnum', # Person number.
  'wpfinwgt', # Final person weight
  'erelrpe', # Need to calculate household weight
  'monthcode', 
  'swave',
  'eresidenceid',
  'esex',
  'tage',
  'tprval', # Property value of primary residence
  'tprloanamt', # Principal owed on the first three mortgages and loans against the primary residence
  'tirakeoval',# Value of IRA and KEOGH accounts
  'tthr401val', # Value of 401k, 403b, 503b, and Thrift Savings Plan accounts
  'elife_type', # Type of life insurance policy owned, 1 Term, 2 Whole, 3 Both
  'tlife_cval',
  'tlife_fval',
  'tlifeamt', # What was the total amount of life insurance payments ... received
  'tsur7amt',
  'tpothinc',
  'thtotinc',
  'totcmdpay',
  'tmdpay'
) %>% union(labels$label[labels$category == 'Asset_Amount'] %>% tolower) %>%
  setdiff(c('tmhloan_num', 'tbus_inv_num')) ## 'tmhloan_num' and 'tbus_inv_num' are not included in all waves (and we don't need them)

time = Sys.time()
wealth16.all <- read_stata('SIPP/pu2014w4.dta', col_select = all_of(col_sel)) %>% distinct
# Use distinct in case of duplicate records
time - Sys.time()

wealth15.all <- read_stata('SIPP/pu2014w3.dta', col_select = all_of(col_sel)) %>% distinct
wealth14.all <- read_stata('SIPP/pu2014w2.dta', col_select = all_of(col_sel)) %>% distinct
wealth13.all <- read_stata('SIPP/pu2014w1.dta', col_select = all_of(col_sel)) %>% distinct

all_wave <- rbind(wealth13.all, wealth14.all, wealth15.all, wealth16.all)

# We only need year-level data, so extract month 12
# https://www2.census.gov/programs-surveys/sipp/Select_approp_wgt_2014SIPPpanel.pdf
all_wave_annual <- all_wave %>% filter(monthcode == 12) %>% select(-monthcode)

# Save intermediary dataset so that we don't need to read all the data each time
save(all_wave_annual, file = 'all_wave_annual.rda')
```

```{r Load_Census_Data, echo = T}
# Load data saved from previous chunk
load('all_wave_annual.rda')
         
# extract name and description of fields collected
colname_label <- all_wave_annual %>% 
  sapply(function(x) attr(x,'label')) %>% 
  as.data.table(keep.rownames = T) %>% 
  setnames(c('colmname', 'description'))

# extract all Person-level fields
person_label <- colname_label[
  !grepl('debt', colmname, ignore.case = T) &
  grepl('Person-level ', description, ignore.case = T)
  ]

# ultimate fields selected
col_sel_ult <- c('swave', 'erelrpe', 'ssuid', 'wpfinwgt', 'tannval', 'tirakeoval', 'tthr401val', 'tlife_cval', 'tlife_fval') %>%
  union(person_label$colmname)

# field code and description
colname_label[colmname %in% col_sel_ult] %>% kable

# subset data
all_wave_annual <- all_wave_annual[, col_sel_ult]

all_wave_annual
```


## American Council of Life Insurers (ACLI) data

### Read ACLI data from @AmericanCouncilofLifeInsurers2019

```{r Load_ACLI_Data}
# Sheet `FaceInForce` generated by manually copying 
# Table 7.9 Life Insurance in Force in the United States, by Year (millions)
acli_face <- read_excel('acli.xlsx', 
                        sheet = 'FaceInForce', 
                        col_types = c("text", "skip", "numeric", "skip", "numeric", "skip", "numeric", "skip", "numeric")
                        ) %>%
  as_tibble %>%
  gather(type, faceinforce, ordind:total)

# Sheet `Reserve` generated by manually copying 
# Table 3.7 Life Insurance Policy Reserves, by Type and Year (millions)
acli_reserve <- read_excel("acli.xlsx",
                           sheet = "Reserve", 
                           col_types = c("text", "numeric", "numeric", "numeric", "numeric")
                           )%>%
  as_tibble %>%
  gather(type, reserve, ordind:total)
```

### Join two ACLI data tables

```{r Join_ACLI_Data, echo = T}
ACLIdata_all <- inner_join(acli_face,acli_reserve) %>%
  mutate(year = as.integer(gsub('[^0-9]', '',Year))) %>%
  select(-Year) %>% 
  mutate(reserveratio = reserve/faceinforce)


ACLIdata_all # dollar amount in million ($000,000)
```


## National Association of Insurance Commissioners (NAIC) data

### Read NAIC data from @GlobalMarketIntelligence2020

```{r Load_NAIC_Data}
# Data manually downloaded from `Financials, U.S. Life Statutory, NAIC Format`

# `Life Industry | LIFE ANALYSIS OF OPERATIONS BY LOB (PG. 6)`
NAICsurrender = read.csv2('surrender.csv') %>% as_tibble %>%
  filter(type %in% c('indl', 'ordinary', 'credit', 'group')) %>%
  gather(year, surrender_benefit, Y1996:Y2019) %>%
  mutate(surrender_benefit = as.numeric(surrender_benefit))

# Extracted field: Surrender Benefits, Withdrawals for Life Contracts ($000)
# Surrender benefits and withdrawals for life contracts includes all surrender 
# or other withdrawal benefit amounts incurred in connection with contract provisions for surrender or withdrawal.
# Excludes premium and annuity considerations for life contracts returned, 
# withdrawals on deposit-type contracts, 
# and amounts transferred to premium and annuity considerations,
# separate account or amounts redeposited.


# `Life Industry | LIFE EXHIBIT OF LIFE INSURANCE (PG. 25)`
NAICexhibits <- read.csv2('lifeexhibits.csv') %>% 
  as_tibble %>% 
  filter((nchar(category) > 1) & grepl('Lapsed|Surrendered', category)) %>%
  mutate_at(vars(paste0('Y', 1996:2019)), as.numeric)

NAICexhibits$category %<>% gsub('Policy & Cert', 'Policies', .)
NAICexhibits$category %<>% gsub('Certificates', 'Policies', .)

NAICexhibits <- NAICexhibits %>%
  group_by(type, category) %>%
  summarise_all(sum)

NAICexhibits <-  NAICexhibits %>% 
  gather(year, value, Y1996:Y2019) %>% 
  spread(category,value) %>%
  rename(
    lapse_face = `Insurance Lost: Lapsed`, 
    surrender_face = `Insurance Lost: Surrendered`,
    lapse_count = `Policies Lost: Lapsed`,
    surrender_count = `Policies Lost: Surrendered`) %>%
  mutate(
    terminated_face = lapse_face + surrender_face,
    terminated_count = lapse_count + surrender_count)

# Extracted field 1: Insurance Lost: Surrendered ($000)
# Surrender reports the cancellation from in force of the face amounts 
# (or adjusted amounts of insurance) 
# for policies that were surrendered by the owners for their cash value,
# or where a policy loan indebtedness 
# (loan principal plus accrued interest) 
# reached or exceeded the reserve value causing termination of insurance coverage.

# # Extracted field 2: Insurance Lost: Lapsed ($000)
# Lapse reports cancellation from in force of insurance without nonforfeiture provisions as the result of nonpayment of premiums prior to the normal expiration date of such insurance coverage.
```

### Join two NAIC data tables

```{r Join_NAIC_Data, echo = T}
NAICdata_all = NAICexhibits %>%
  full_join(NAICsurrender) %>%
  select(type, year, 
         lapse_face, surrender_face, terminated_face, 
         lapse_count, surrender_count, terminated_count,
         surrender_benefit) %>%
  gather(category, value, lapse_face:surrender_benefit) %>% 
  spread(type,value) %>%
  mutate(ordind = indl + ordinary) %>% # combine `indl` and `ordinary` to align with ACLI data
  select(-c(indl, ordinary)) %>%
  mutate(total = ordind + credit + group) %>% # calculate total
  gather(type, value, credit:total) %>% 
  spread(category, value) %>%
  mutate(year = as.integer(gsub('[^0-9]', '', year)),
         ben_sur_ratio = surrender_benefit/surrender_face, # calculate benefit:face ratio for surrendered policies
         ben_ter_ratio = surrender_benefit/terminated_face # calculate benefit:face ratio for all terminated policies
         )

NAICdata_all # dollar amount in thousand ($000)
```


## Life Insurance Marketing and Research Association (LIMRA) data

### Read LIMRA data from @LIMRA2016

```{r Get_LIMRA_Data, echo = T}
# Data downloaded from `'https://research.limra.com/vizql/w/2016OwnershipinFocus/v/HistoricalTrends/vud/sessions/0C8191433C6F465CA9B2EEF8EE405BB2-1:1/views/4131351539022289803_3722576010483076992?csv=true&summary=true`
LIMRA_data <- read.csv2('ChangeInRate_data.csv') %>% data.table

LIMRA_data
```

## National Health Expenditure (NHE) data

### Read NHE data from @CMS2019

```{r Get_NHE_Data, echo = T}
# Data downloaded from `https://www.cms.gov/files/zip/nhe-tables.zip`
nursing <- read.csv('NHE2018/NHE2018.csv', skip = 1)
names(nursing)[1] = 'expenditure'
nursing = nursing[
  nursing$expenditure == 'Total Nursing Care Facilities and Continuing Care Retirement Communities',
                  ] %>%
  t %>%
  as_tibble(rownames = 'year') 

names(nursing)[2] = 'nurs_expense'

nursing <- nursing %>%
  mutate(
    year = gsub('[^0-9]', '',year) %>% as.numeric,
    nurs_expense = as.numeric(gsub('[^0-9.]', '',nurs_expense)) / 1000
    ) %>%
  filter(is.finite(year))

nursing # dollar amount in billion ($000,000,000)
```


## Federal Reserve data

### Read Federal Reserve data from @fred2020

```{r Get_fred_Data, echo = T}
# Households; Owner-Occupied Real Estate at Market Value, Level
# https://fred.stlouisfed.org/series/BOGZ1FL155035013Q
real_state <- read.csv('BOGZ1FL155035013Q.csv') %>%
  as_tibble %>%
  filter(DATE > 2000, grepl('-10-01', DATE)) %>% # Get last quarter of the year since 2020
  transmute(
    end_of_year = DATE %>% as.Date %>% format('%Y') %>% as.numeric,
    `real_estate_value` = as.numeric(BOGZ1FL155035013Q)/1e6
    )


# # Households; Owner-Occupied Real Estate *Including Vacant Land and Mobile Homes at Market Value, Market Value Levels*
# # https://fred.stlouisfed.org/series/HOOREVLMHMV
# real_state <- read.csv('HOOREVLMHMV.csv') %>%
#   as_tibble %>%
#   filter(DATE > 2000, grepl('-10-01', DATE)) %>% # Get last quarter of the year since 2020
#   transmute(
#     end_of_year = DATE %>% as.Date %>% format('%Y') %>% as.numeric,
#     `real_estate_value` = as.numeric(HOOREVLMHMV)/1e3
#     )


real_state # dollar amount in trillion ($000,000,000,000)
```

# Data analysis

## Life insurance termination

```{r rsloss, echo = T}
rsloss <- real_state %>% 
  filter(end_of_year %in% c(2006, 2009)) %>% # finacial crisis 2007 (i.e. end of 2006) to 2009
  apply(2, diff)

paste0('Market loss of household real estate p.a.: ', -round(rsloss[2]/rsloss[1],2),
       ' trillion USD') %>% print
```

## Households' real estate market loss during the global financial crisis 
```{r termination, echo = T}
start_year = 2017
termination <- NAICdata_all %>%
  filter(type == 'total' & year >= start_year) %>%
  mutate(
    `terminated_count (million)` = terminated_count/1e6,
    `terminated_face (trillion USD)` = terminated_face/1e9
    ) %>%
  select(year, `terminated_count (million)`, `terminated_face (trillion USD)`) 

paste0('Terminated policies each year since ', start_year, ': ', 
       termination$`terminated_count (million)` %>% min %>% round,
       ' million') %>% print
paste0('Terminated face on average each year since ', start_year, ': ', 
       termination$`terminated_face (trillion USD)` %>% mean %>% round(2),
       ' trillion USD') %>% print
```

## Life Insurance ownership at household level

### Estimate with LIMRA data
```{r LIMRA_estmiate, echo = T}
paste0('LIMRA estmiate (2016): ', 
       LIMRA_data[Ownership.Type == 'Owns Any Life' & Year == 2016]$Percent
       ) %>% print
```

### Estimate with SCF data

```{r SCF_estmiate, echo = T}
# Investigate insurance ownership
# x4001: Do you (or anyone in your family living here) have any life insurance? 
# 1. YES
# 5. NO
# https://www.federalreserve.gov/apps/scfcb/detail/2948/life%20insurance

# http://asdfree.com/survey-of-consumer-finances-scf.html
# Unweighted count
# scf_design %>% with(svyby(~one , ~x4001 , unwtd.count)) %>% scf_MIcombine
# Get close result with: scf_design$designs[[1]]$variables$x4001 %>% table

# Count the weighted size of the generalizable population by group x4001:
pop_by_lifeown = scf_design %>% 
  with(svyby(~one , ~x4001 , svytotal)) %>%
  scf_MIcombine

# Extract weighted count of each answer (1. YES / 5. NO) 
pop_by_lifeown_res = pop_by_lifeown$coefficients

# Proportion of life insurance ownership
scf_est <- pop_by_lifeown_res[1]/sum(pop_by_lifeown_res)

paste0('SCF estmiate (2016): ', round(scf_est*100), '%') %>% print
```

### Estimate with SIPP data

```{r SIPP_estmiate, echo = T}
hh_lifeown = all_wave_annual %>% 
  select(swave, wpfinwgt, tlife_fval, erelrpe, ssuid) %>%
  group_by(swave, ssuid) %>%
  mutate(with_life_ins = any(is.finite(tlife_fval))) %>% # return `True` if anybody in the household has life insurance
  filter((erelrpe == 1 | erelrpe == 2) & is.finite(wpfinwgt)) %>% # select householder
  group_by(swave,with_life_ins) %>% 
  summarise(count_household = sum(wpfinwgt))

# Only consider 2016 data to align with SCF
hh_lifeown_2016 <- hh_lifeown %>% filter(swave == 4)

# Proportion of life insurance ownership
sipp_est <- (hh_lifeown_2016 %>% filter(with_life_ins == T) %>% .$count_household)/sum(hh_lifeown_2016$count_household)

paste0('SIPP estmiate (2016): ', round(sipp_est*100), '%') %>% print
```

### Estimate with ACLI data

```{r ACLI_estmiate, echo = T}
# Chapter 7 LIFE INSURANCE (p. 63)
# 75 million households rely on life insurance and/or non-qualified annuities

# Total household data
# `https://www2.census.gov/programs-surveys/demo/tables/families/time-series/households/hh1.xls`
# Year	Total households (Numbers in thousands)
# 2019	128,579
# 2018	127,586

paste0('ACLI estmiate (2018): ', round(75/127.586*100), '%') %>% print
```


## Life insurance premium

```{r premium, echo = T}

# Net Premiums & Annuity Considerations: Life, A&H, 2019 Y (31/12/2019)
# Life Industry | LIFE EXHIBIT 1 PART 1 & 2 (PG. 9, 10)
total_prem <- sum(
107055, # Indl Life
121172176, # Ordinary Life
29745999, # Group Life Insurance
639924 # Credit Life
)/1e6

# Life Industry | LIFE INCOME STATEMENT
# Revenue
# Life Insurance Premiums
# 115,034,222 #2016 Y
# 137,148,663 #2017 Y
# 145,090,526 #2018 Y

paste0('Total life insurance premium in 2019: ', round(total_prem), ' billion USD') %>% print
```


## Individual's life Insurance value versus 401k value

```{r data, echo = T}
person_data_bycash <- all_wave_annual[, c(
  'swave', 'wpfinwgt', 'tannval', 'tirakeoval', 'tthr401val', 'tlife_cval', 'tlife_fval',
  person_label$colmname)] %>%
  group_by(swave, havecashvalue = is.finite(tlife_cval)) %>% # Differentiate between the universe with cash value and the one without
  summarise_all(list(
    ~sum(., na.rm = T),
    ~weighted.sum(.,wpfinwgt)
  )) %>% 
  select(-wpfinwgt_weighted.sum) %>%
  mutate(cashratio = tlife_cval_weighted.sum/tlife_fval_weighted.sum) # Calculate cash_value:face_falue ratio by group (with ot without cash value)

person_data = person_data_bycash %>% 
  select(-c(havecashvalue,cashratio)) %>%
  group_by(swave) %>% 
  summarise_all(sum) %>%
  mutate(cashratio = tlife_cval_weighted.sum/tlife_fval_weighted.sum) # Calculate aggregate cash_value:face_falue ratio

swave_year = tibble(swave = as.numeric(1:4), year = 2013:2016)

ACLIdata = ACLIdata_all %>% 
  inner_join(swave_year) %>%
  full_join(person_data[, c('swave', 'cashratio')]) %>% # Add aggregate cash_value:face_falue ratio to the ACLI data
  mutate(evmultiplier = reserveratio/cashratio) # Calculate aggregate reserve:cash_value ratio


terminated_ins_ordind = NAICdata_all %>%
  full_join(swave_year) %>%
  filter(is.finite(swave) & (type %in% c('ordind'))) %>%
  select(-type) %>%
  full_join(person_data[, c('swave', 'cashratio')]) %>%
  mutate(
    surbenmultiplier = ben_ter_ratio/cashratio
  ) # Calculate aggregate surrender_benefit:cash_value ratio


person_with_cv <- person_data_bycash %>%
  filter(havecashvalue == T) %>% # Examine only the universe with cash value
  select(swave,
         wpfinwgt_sum, tthr401val_weighted.sum, tlife_cval_weighted.sum, tlife_fval_weighted.sum) %>%
  full_join(ACLIdata %>% filter(type == 'ordind') %>% select(swave, evmultiplier)) %>%
  full_join(terminated_ins_ordind) %>%
  mutate(
    meanfv = tlife_fval_weighted.sum/wpfinwgt_sum, # Mean face value life insurance
    meancv = tlife_cval_weighted.sum/wpfinwgt_sum, # Mean cash value life insurance
    meanev = meancv * evmultiplier, # Mean economic value life insurance
    meansb = meancv * surbenmultiplier, # Mean surrender benefit life insurance
    mean401k = tthr401val_weighted.sum/wpfinwgt_sum, # Mean 401k account value
    cashratio = tlife_cval_weighted.sum/tlife_fval_weighted.sum
  ) %>% 
  ungroup 

EVvs401 <- person_with_cv %>% 
  select(swave, meancv, meanev, meansb, mean401k, meanfv)
```


```{r plot}
# Prepare data frame for plotting
plotdf <- (EVvs401 %>% select(-swave) %>% cbind(nul = NA) %>% t)/1000

plotdf[c('mean401k', 'meanev'),] %>% rowMeans %>% 
  write.table('ev401kplot.csv', dec = '.', sep = ';', row.names = F)

barplot(plotdf[c('mean401k', 'meanev'),] %>% rowMeans,
        horiz = T,
        space = c(0.1,0.5),
        ylim = c(0,4))
```

```{r}

ymax = max(plotdf, na.rm = T) %>% ceiling

basecol = c('springgreen4', 'blue', 'orange', 'grey', 'red')
bordercol = c(basecol[1], NA, NA, basecol[4], basecol[5])

adjustedcol = c(
  adjustcolor(basecol[1], alpha.f = 0.7),
  adjustcolor(basecol[2], alpha.f = 0.8),
  adjustcolor(basecol[3], alpha.f = 1),
  adjustcolor(basecol[4], alpha.f = 0.6),
  adjustcolor(basecol[5], alpha.f = 0)
)

dens = 17

par(xpd = T, mar = c(2, 4, 6, 0.4), lty = 1, lwd = 1, mgp = c(0,0.75,0))

barplot(plotdf[c('meanev', 'mean401k'),], 
        ylim = c(0, ymax), 
        beside = T, 
        names.arg = 2013:2016,
        col = adjustedcol[c(1,4)],
        border = basecol[c(1,4)],
        space = c(0.1,0.9))
title('Average individual holdings \nUniverse: policyholders w. cash value > 0', 
      line = 0.2, cex.main = 0.9)
barplot(plotdf[c('meancv', 'nul'),], 
        beside = T, 
        add = T,
        density = dens,
        col = adjustedcol[c(2,4)],
        border = bordercol[c(2,4)],
        space = c(0.1,0.9))
barplot(plotdf[c('meansb', 'nul'),], 
        beside = T, 
        add = T,
        density = c(dens,NA),
        angle = 135,
        col = adjustedcol[c(3,4)],
        border = bordercol[c(3,4)],
        space = c(0.1,0.9))

legend(8.9, ymax*1.3, 
       legend = c('Economic value', 'Cash value', 'Surrender benefit', '401k account value'),
       fill = adjustedcol,
       border = bordercol,
       density = c(NA, dens, dens, NA),
       angle = c(NA, 45, 135, NA),
       cex = 0.8, yjust = 1, bty = 'n')

par(lwd=0.5, lty = 2)

barplot(plotdf[c('meanfv', 'nul'),], 
        beside = T, 
        add = T,
        density = c(dens,NA),
        angle = 135,
        col = adjustedcol[c(5,4)],
        border = bordercol[c(5,4)],
        space = c(0.1,0.9))

legend(8.9, ymax*1.13, 
       legend = c('Face amount'),
       fill = NA,
       border = bordercol[5],
       cex = 0.8, bty = 'n')

text(x = 0, y = ymax, 'thousand USD', adj = c(1,0))
text(x = seq(1.4,length.out = 4, by = 3), 
     t(plotdf[c('meanev'), ]) + 0.5, 
     plotdf[c('meanev'), ] %>% t %>% round, 
     adj = c(0.5,0), font=2)
text(x = seq(1.4,length.out = 4, by = 3), 
     t(plotdf[c('meansb', 'meancv'), ]) - 0.5, 
     plotdf[c('meansb', 'meancv'), ] %>% t %>% round, 
     adj = c(0.5,1), cex = 0.8)
text(x = seq(2.5,length.out = 4, by = 3), 
     t(plotdf[c('mean401k'), ]) + 0.5, 
     plotdf[c('mean401k'), ] %>% t %>% round, 
     adj = c(0.5,0), font=2)

text(x = seq(1.4,length.out = 4, by = 3), 
     t(plotdf[c('meanfv'), ]), 
     plotdf[c('meanfv'), ] %>% t %>% round, 
     adj = c(0.5,0.5), font=3, col = 'red', cex = 0.9)


valueloss_all = NAICdata_all %>% 
  inner_join(ACLIdata_all) %>%
  mutate(
    totalev = terminated_face * reserveratio,
    totalvalueloss = totalev - surrender_benefit
  ) %>% 
  filter(type != 'total') %>%
  select(year, type, lapse_face, surrender_face, terminated_face, totalev, surrender_benefit, totalvalueloss) %>%
  gather(category, value, lapse_face:totalvalueloss) %>%
  group_by(year,category) %>%
  summarise(value = sum(value)/1e6) %>%
  spread(category, value) %>%
  inner_join(nursing)

valueloss_all %>% 
  select(year, totalvalueloss, nurs_expense) %>%
  write.table('valueloss_all.csv', dec = '.', sep = ';', row.names = F)

## plot value loss ----

par(xpd = T, mar = c(2, 4.5, 6, 1.5), lty = 1, lwd = 1, mgp = c(0,0.75,0))
vl_col = 'red'
adjusted_vl_col = adjustcolor(vl_col, alpha.f = 0.7)

ymax = 250
plot(valueloss_all$year, 
     valueloss_all$totalev, 
     type = 'l', 
     ylim = c(0, ymax),
     yaxs = 'i',
     xaxs = 'i',
     xaxt = 'n',
     col = basecol[1],
     lwd = 2,
     bty = 'l',
     ylab = '',
     xlab = '',
     main = 'US aggregate')

axis(1, 
     at=seq(to = 2018, by = 3, length.out = 8))

polygon(
  valueloss_all$year %>% c(rev(range(.)), .),
  c(0,0,valueloss_all$totalev),
  col = adjustedcol[1],
  border = NA
  )

lines(valueloss_all$year, 
      valueloss_all$surrender_benefit,
      lwd = 2,
      col = basecol[3])

polygon(
  valueloss_all$year %>% c(rev(range(.)), .),
  c(0,0,valueloss_all$surrender_benefit),
  density = dens,
  angle = 135,
  col = adjustedcol[3],
  border = NA
)

lines(valueloss_all$year, 
      valueloss_all$totalvalueloss,
      lwd = 2,
      col = adjusted_vl_col)

lines(valueloss_all$year, 
      valueloss_all$nurs_expense,
      lwd = 2,
      lty = 2,
      col = 'grey30')

ind = seq(to = (valueloss_all$year %>% range %>% diff) + 1, by = 6, length.out = 4)

text(x = 1996, y = ymax*1.1, 'billion USD', adj = c(1,0))

valueloss_all$totalev[ind] %>% 
  text(valueloss_all$year[ind], 
       .,
       round(.),
       col = basecol[1],
       adj = c(0.5,0))

valueloss_all$nurs_expense[ind] %>% 
  text(valueloss_all$year[ind], 
     .,
     round(.),
     col = 'grey10',
     adj = c(0,1))

valueloss_all$totalvalueloss[ind] %>% 
  text(valueloss_all$year[ind], 
       .,
       round(.),
       col = vl_col,
       adj = c(1,0))

valueloss_all$surrender_benefit[ind] %>% 
  text(valueloss_all$year[ind], 
       .,
       round(.),
       col = basecol[3],
       adj = c(0.5,0))

legend(
  'topleft',
  bty = 'n',
  legend = c('Economic value of terminated policies (EV)',
             'Surrender benefit of terminated policies (SB)',
             'Value loss due to termination (EV-SB)',
             'Nursing care spending'),
  lty = c(1,1,1,2),
  lwd = 2,
  col = c(basecol[c(1,3)], vl_col, 'grey10'),
  cex = 0.8
)
```


# References {-}
