# Load packages
library(ggplot2)
library(dplyr)

# Import data
census <- read.csv("/resources/rstudio/BusinessStatistics/Data/census.csv")

str(census)
## 'data.frame':    30 obs. of  39 variables:
##  $ GEOID                         : num  3.3e+09 3.3e+09 3.3e+09 3.3e+09 3.3e+09 ...
##  $ NAME                          : Factor w/ 30 levels "Alexandria town, Grafton County, New Hampshire",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ popTotal                      : int  1876 5285 2658 2159 4631 7281 987 3050 1011 1294 ...
##  $ medianAge                     : num  45 45.8 46.5 37.8 41.8 43.8 57.6 41.4 51.5 44.6 ...
##  $ popNative_USA                 : int  1833 4975 2554 2094 4518 7223 966 3003 989 1268 ...
##  $ popNative                     : int  1835 5041 2573 2142 4533 7236 976 3026 989 1279 ...
##  $ popNaitve_NH                  : int  1008 2450 1418 1008 2273 4420 425 1611 384 659 ...
##  $ popMoved_otherState           : int  10 72 13 41 37 60 31 84 43 6 ...
##  $ popMoved_abroad               : int  5 0 13 0 0 11 0 9 0 0 ...
##  $ popCommute_car                : int  755 2298 1178 993 2537 3615 383 1381 524 696 ...
##  $ popCommute_publicT            : int  0 0 0 0 13 0 0 0 0 11 ...
##  $ popCommute_bicycle            : int  0 0 19 0 0 0 0 0 0 0 ...
##  $ popCommute_foot               : int  6 24 36 143 33 0 0 96 4 0 ...
##  $ popCommute_other              : int  14 12 5 8 0 54 0 6 3 3 ...
##  $ popCommute_home               : int  29 277 182 24 79 39 44 48 49 43 ...
##  $ popBA                         : int  199 1274 742 531 783 928 271 484 374 209 ...
##  $ popPov                        : int  159 305 176 457 234 620 45 323 79 32 ...
##  $ medianIncome                  : int  56597 76676 69489 44000 70037 60938 53958 54015 70625 61058 ...
##  $ earningsAgg                   : int  31933300 127223500 65678000 36025500 118527900 150122700 21898000 54045100 27997300 31278900 ...
##  $ earningsAgg_WageSalary        : int  29137500 118023200 57790900 34825100 112078200 142960400 19791000 51913800 23983000 26628900 ...
##  $ earningsAgg_SelfEmpl          : int  2795800 9200300 7887100 1200400 6449800 7162300 2106900 2131300 4014200 4650000 ...
##  $ LF                            : int  905 2775 1559 1232 2905 3904 459 1618 611 794 ...
##  $ LF_Civilian                   : int  905 2775 1559 1232 2905 3904 459 1610 611 794 ...
##  $ LF_Civilian_Unemployed        : int  68 120 129 59 180 174 21 40 31 28 ...
##  $ LF_Not                        : int  631 1526 689 590 992 2117 442 875 313 267 ...
##  $ housingTotal                  : int  960 4437 1251 1445 2415 3610 1033 2333 785 735 ...
##  $ housingVacant_rent            : int  0 38 0 28 0 83 0 0 0 0 ...
##  $ housingVacant_seasonal        : int  259 2123 151 326 618 585 529 1017 331 115 ...
##  $ medianHomeValue               : int  178800 279000 208000 155600 177500 186100 253200 205600 334000 165900 ...
##  $ medianGrossRent               : int  869 798 1125 834 1068 892 1027 824 1083 941 ...
##  $ unemplRate                    : num  7.51 4.32 8.27 4.79 6.2 ...
##  $ LFparticipationRate           : num  58.9 64.5 69.4 67.6 74.5 ...
##  $ housing_percentOfseasonal     : num  27 47.8 12.1 22.6 25.6 ...
##  $ housing_percentOfrent         : num  0 0.856 0 1.938 0 ...
##  $ earnings_percenOfcurrentlabor : num  100 100 100 100 100 ...
##  $ popPov_percent                : num  8.48 5.77 6.62 21.17 5.05 ...
##  $ popBA_percent                 : num  10.6 24.1 27.9 24.6 16.9 ...
##  $ housingVacant_seasonal_percent: num  27 47.8 12.1 22.6 25.6 ...
##  $ housingVacant_rent_percent    : num  0 0.856 0 1.938 0 ...

summary(census)
##      GEOID                                                       NAME   
##  Min.   :3.300e+09   Alexandria town, Grafton County, New Hampshire: 1  
##  1st Qu.:3.300e+09   Alton town, Belknap County, New Hampshire     : 1  
##  Median :3.300e+09   Andover town, Merrimack County, New Hampshire : 1  
##  Mean   :3.301e+09   Ashland town, Grafton County, New Hampshire   : 1  
##  3rd Qu.:3.301e+09   Barnstead town, Belknap County, New Hampshire : 1  
##  Max.   :3.301e+09   Belmont town, Belknap County, New Hampshire   : 1  
##                      (Other)                                       :24  
##     popTotal       medianAge     popNative_USA     popNative    
##  Min.   :  582   Min.   :37.80   Min.   :  550   Min.   :  557  
##  1st Qu.: 1483   1st Qu.:44.77   1st Qu.: 1452   1st Qu.: 1457  
##  Median : 2934   Median :48.75   Median : 2865   Median : 2889  
##  Mean   : 3774   Mean   :48.59   Mean   : 3645   Mean   : 3669  
##  3rd Qu.: 4774   3rd Qu.:53.00   3rd Qu.: 4576   3rd Qu.: 4615  
##  Max.   :16171   Max.   :59.10   Max.   :15511   Max.   :15613  
##                                                                 
##   popNaitve_NH    popMoved_otherState popMoved_abroad  popCommute_car  
##  Min.   : 202.0   Min.   :  6.00      Min.   : 0.000   Min.   : 234.0  
##  1st Qu.: 698.5   1st Qu.: 23.25      1st Qu.: 0.000   1st Qu.: 710.8  
##  Median :1316.0   Median : 42.00      Median : 0.000   Median :1397.5  
##  Mean   :1881.0   Mean   : 77.80      Mean   : 6.467   Mean   :1670.0  
##  3rd Qu.:2405.8   3rd Qu.: 87.25      3rd Qu.:10.500   3rd Qu.:2273.0  
##  Max.   :9378.0   Max.   :470.00      Max.   :49.000   Max.   :7069.0  
##                                                                        
##  popCommute_publicT popCommute_bicycle popCommute_foot popCommute_other
##  Min.   : 0.0       Min.   : 0.000     Min.   :  0.0   Min.   :  0.00  
##  1st Qu.: 0.0       1st Qu.: 0.000     1st Qu.:  6.5   1st Qu.:  3.50  
##  Median : 0.0       Median : 0.000     Median : 15.0   Median : 13.50  
##  Mean   : 3.9       Mean   : 2.567     Mean   : 34.6   Mean   : 35.47  
##  3rd Qu.: 0.0       3rd Qu.: 0.000     3rd Qu.: 34.0   3rd Qu.: 51.00  
##  Max.   :72.0       Max.   :27.000     Max.   :238.0   Max.   :225.00  
##                                                                        
##  popCommute_home      popBA            popPov        medianIncome  
##  Min.   : 21.00   Min.   : 159.0   Min.   :  30.0   Min.   :44000  
##  1st Qu.: 36.00   1st Qu.: 401.5   1st Qu.: 146.2   1st Qu.:52204  
##  Median : 58.50   Median : 651.0   Median : 215.0   Median :61438  
##  Mean   : 93.07   Mean   : 833.0   Mean   : 390.8   Mean   :59857  
##  3rd Qu.:134.00   3rd Qu.: 973.8   3rd Qu.: 440.5   3rd Qu.:68043  
##  Max.   :360.00   Max.   :2995.0   Max.   :2557.0   Max.   :76676  
##                                                                    
##   earningsAgg        earningsAgg_WageSalary earningsAgg_SelfEmpl
##  Min.   : 11992400   Min.   : 10745100      Min.   : 1200400    
##  1st Qu.: 31442500   1st Qu.: 27256050      1st Qu.: 3072075    
##  Median : 67989550   Median : 60202850      Median : 6436850    
##  Mean   : 84895040   Mean   : 77478727      Mean   : 7416153    
##  3rd Qu.:117084150   3rd Qu.:110519975      3rd Qu.: 9046525    
##  Max.   :353736200   Max.   :325047100      Max.   :28684400    
##                                                                 
##        LF          LF_Civilian     LF_Civilian_Unemployed     LF_Not      
##  Min.   : 288.0   Min.   : 288.0   Min.   : 17.00         Min.   : 233.0  
##  1st Qu.: 821.8   1st Qu.: 821.8   1st Qu.: 40.75         1st Qu.: 530.8  
##  Median :1647.5   Median :1643.5   Median : 68.00         Median : 842.5  
##  Mean   :1979.8   Mean   :1978.9   Mean   :106.07         Mean   :1184.5  
##  3rd Qu.:2739.5   3rd Qu.:2739.5   3rd Qu.:120.00         3rd Qu.:1513.8  
##  Max.   :8468.0   Max.   :8461.0   Max.   :635.00         Max.   :5018.0  
##                                                                           
##   housingTotal  housingVacant_rent housingVacant_seasonal medianHomeValue 
##  Min.   : 498   Min.   :  0.0      Min.   :  27.0         Min.   :155600  
##  1st Qu.:1054   1st Qu.:  0.0      1st Qu.: 259.0         1st Qu.:178275  
##  Median :1826   Median :  0.0      Median : 520.5         Median :206800  
##  Mean   :2501   Mean   : 19.7      Mean   : 798.9         Mean   :226277  
##  3rd Qu.:3486   3rd Qu.: 13.5      3rd Qu.:1189.8         3rd Qu.:263650  
##  Max.   :9945   Max.   :208.0      Max.   :2999.0         Max.   :334000  
##                                                                           
##  medianGrossRent    unemplRate     LFparticipationRate
##  Min.   : 787.0   Min.   : 1.310   Min.   :45.93      
##  1st Qu.: 849.0   1st Qu.: 3.835   1st Qu.:57.72      
##  Median : 941.0   Median : 4.566   Median :63.42      
##  Mean   : 960.7   Mean   : 5.241   Mean   :62.30      
##  3rd Qu.:1027.0   3rd Qu.: 6.912   3rd Qu.:66.53      
##  Max.   :1223.0   Max.   :10.272   Max.   :74.84      
##  NA's   :1                                            
##  housing_percentOfseasonal housing_percentOfrent
##  Min.   : 1.477            Min.   :0.0000       
##  1st Qu.:18.398            1st Qu.:0.0000       
##  Median :29.462            Median :0.0000       
##  Mean   :30.980            Mean   :0.5421       
##  3rd Qu.:41.164            3rd Qu.:0.6900       
##  Max.   :68.020            Max.   :5.0510       
##                                                 
##  earnings_percenOfcurrentlabor popPov_percent   popBA_percent  
##  Min.   :100                   Min.   : 2.473   Min.   :10.61  
##  1st Qu.:100                   1st Qu.: 5.894   1st Qu.:15.94  
##  Median :100                   Median : 8.145   Median :24.35  
##  Mean   :100                   Mean   : 9.077   Mean   :23.81  
##  3rd Qu.:100                   3rd Qu.:10.869   3rd Qu.:29.02  
##  Max.   :100                   Max.   :21.167   Max.   :43.13  
##                                                                
##  housingVacant_seasonal_percent housingVacant_rent_percent
##  Min.   : 1.477                 Min.   :0.0000            
##  1st Qu.:18.398                 1st Qu.:0.0000            
##  Median :29.462                 Median :0.0000            
##  Mean   :30.980                 Mean   :0.5421            
##  3rd Qu.:41.164                 3rd Qu.:0.6900            
##  Max.   :68.020                 Max.   :5.0510            
## 

Suppose that you are writing a report on the economic importance of snowbirds in the Lakes Region Planning Commission’s Region. The director ask you what the share of seasonal homes in total (housing_percentOfseasonal) for a typical town in the region is.

Q1 Would you choose the mean or the median for the typical value?

I would choose the mean for the typical value. ## Q2 Explain your answer. I would choose the mean because the median is for extreme values because the mean can’t handle the extreme values, only the typical. ## Q3 What is the highest percentage of the seasonal homes by any town in the region? The highest percentage was about 65% of seasonal homes in a town. The average was just under 30%

# Create faceted histogram
ggplot(census, aes(x = housing_percentOfseasonal)) +
  geom_histogram() +
  facet_wrap(~ popTotal)


# Create box plots of city mpg by UR_aboveAve
ggplot(census, aes(x = 1, y = housing_percentOfseasonal)) +
  geom_boxplot()


# Create overlaid density plots for same data
ggplot(census, aes(x = housing_percentOfseasonal)) +
  geom_density(alpha = .3)

# If data has extreme values
census %>%
  group_by(housing_percentOfseasonal) %>%
  summarize(median = median(housing_percentOfseasonal, na.rm = TRUE),
            IQR = IQR(housing_percentOfseasonal, na.rm = TRUE))
## # A tibble: 30 x 3
##    housing_percentOfseasonal median   IQR
##                        <dbl>  <dbl> <dbl>
##  1                      1.48   1.48     0
##  2                      2.91   2.91     0
##  3                      8.03   8.03     0
##  4                     12.0   12.0      0
##  5                     12.1   12.1      0
##  6                     15.6   15.6      0
##  7                     16.2   16.2      0
##  8                     17.0   17.0      0
##  9                     22.6   22.6      0
## 10                     23.9   23.9      0
## # ... with 20 more rows

# If data doesn't have extreme values
census %>%
  group_by(housing_percentOfseasonal) %>%
  summarize(mean = mean(housing_percentOfseasonal, na.rm = TRUE),
            sd = sd(housing_percentOfseasonal, na.rm = TRUE))
## # A tibble: 30 x 3
##    housing_percentOfseasonal  mean    sd
##                        <dbl> <dbl> <dbl>
##  1                      1.48  1.48    NA
##  2                      2.91  2.91    NA
##  3                      8.03  8.03    NA
##  4                     12.0  12.0     NA
##  5                     12.1  12.1     NA
##  6                     15.6  15.6     NA
##  7                     16.2  16.2     NA
##  8                     17.0  17.0     NA
##  9                     22.6  22.6     NA
## 10                     23.9  23.9     NA
## # ... with 20 more rows

Suppose that director suspect that the share of seaonsal homes (popBA_percent) may be associated with the educational level of residents. Divide the towns into two groups: 1) educated towns (the share of population with Bachelor’s degree or higher than the average) and 2) other towns (the share of population with Bachelor’s degree or lower than the average). ## Q4 What is the share of seasonal homes in total in a typical educated town? About 42% ## Q5 What is the share of seasonal homes in total in a typical less educated educated town? About 18% ## Q6 What possible explanation you may have for the significant difference, if any? There are a significantly larger amount of seasonal homes in the more educated towns, most likey becuase the farther they went in school the more money they have to spend on seasonal homes. The towns with less seasonal homes probably don’t make as much money on average to buy seasonal homes.

# Create a new variable, UR > or < average
popBA <- mean(census$housing_percentOfseasonal)

census$popBA <- ifelse(census$housing_percentOfseasonal >= popBA, "equal or above ave", "below ave")

# Create box plots of total population by UR_aboveAve
ggplot(census, aes(x = popBA, y = housing_percentOfseasonal)) +
  geom_boxplot()


# If data has extreme values
census %>%
  group_by(popBA) %>%
  summarize(median = median(housing_percentOfseasonal, na.rm = TRUE),
            IQR = IQR(housing_percentOfseasonal, na.rm = TRUE))
## # A tibble: 2 x 3
##   popBA              median   IQR
##   <chr>               <dbl> <dbl>
## 1 below ave            17.0  14.2
## 2 equal or above ave   42.2  19.2

# If data doesn't have extreme values
census %>%
  group_by(popBA) %>%
  summarize(mean = mean(housing_percentOfseasonal, na.rm = TRUE),
            sd = sd(housing_percentOfseasonal, na.rm = TRUE))
## # A tibble: 2 x 3
##   popBA               mean    sd
##   <chr>              <dbl> <dbl>
## 1 below ave           17.7  8.97
## 2 equal or above ave  44.2 11.9