# Load packages
library(ggplot2)
library(dplyr)

# Import data
census <- read.csv("/resources/rstudio/BusStatistics/Data/census.csv")

str(census)
## 'data.frame':    30 obs. of  39 variables:
##  $ GEOID                         : num  3.3e+09 3.3e+09 3.3e+09 3.3e+09 3.3e+09 ...
##  $ NAME                          : Factor w/ 30 levels "Alexandria town, Grafton County, New Hampshire",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ popTotal                      : int  1876 5285 2658 2159 4631 7281 987 3050 1011 1294 ...
##  $ medianAge                     : num  45 45.8 46.5 37.8 41.8 43.8 57.6 41.4 51.5 44.6 ...
##  $ popNative_USA                 : int  1833 4975 2554 2094 4518 7223 966 3003 989 1268 ...
##  $ popNative                     : int  1835 5041 2573 2142 4533 7236 976 3026 989 1279 ...
##  $ popNaitve_NH                  : int  1008 2450 1418 1008 2273 4420 425 1611 384 659 ...
##  $ popMoved_otherState           : int  10 72 13 41 37 60 31 84 43 6 ...
##  $ popMoved_abroad               : int  5 0 13 0 0 11 0 9 0 0 ...
##  $ popCommute_car                : int  755 2298 1178 993 2537 3615 383 1381 524 696 ...
##  $ popCommute_publicT            : int  0 0 0 0 13 0 0 0 0 11 ...
##  $ popCommute_bicycle            : int  0 0 19 0 0 0 0 0 0 0 ...
##  $ popCommute_foot               : int  6 24 36 143 33 0 0 96 4 0 ...
##  $ popCommute_other              : int  14 12 5 8 0 54 0 6 3 3 ...
##  $ popCommute_home               : int  29 277 182 24 79 39 44 48 49 43 ...
##  $ popBA                         : int  199 1274 742 531 783 928 271 484 374 209 ...
##  $ popPov                        : int  159 305 176 457 234 620 45 323 79 32 ...
##  $ medianIncome                  : int  56597 76676 69489 44000 70037 60938 53958 54015 70625 61058 ...
##  $ earningsAgg                   : int  31933300 127223500 65678000 36025500 118527900 150122700 21898000 54045100 27997300 31278900 ...
##  $ earningsAgg_WageSalary        : int  29137500 118023200 57790900 34825100 112078200 142960400 19791000 51913800 23983000 26628900 ...
##  $ earningsAgg_SelfEmpl          : int  2795800 9200300 7887100 1200400 6449800 7162300 2106900 2131300 4014200 4650000 ...
##  $ LF                            : int  905 2775 1559 1232 2905 3904 459 1618 611 794 ...
##  $ LF_Civilian                   : int  905 2775 1559 1232 2905 3904 459 1610 611 794 ...
##  $ LF_Civilian_Unemployed        : int  68 120 129 59 180 174 21 40 31 28 ...
##  $ LF_Not                        : int  631 1526 689 590 992 2117 442 875 313 267 ...
##  $ housingTotal                  : int  960 4437 1251 1445 2415 3610 1033 2333 785 735 ...
##  $ housingVacant_rent            : int  0 38 0 28 0 83 0 0 0 0 ...
##  $ housingVacant_seasonal        : int  259 2123 151 326 618 585 529 1017 331 115 ...
##  $ medianHomeValue               : int  178800 279000 208000 155600 177500 186100 253200 205600 334000 165900 ...
##  $ medianGrossRent               : int  869 798 1125 834 1068 892 1027 824 1083 941 ...
##  $ unemplRate                    : num  7.51 4.32 8.27 4.79 6.2 ...
##  $ LFparticipationRate           : num  58.9 64.5 69.4 67.6 74.5 ...
##  $ housing_percentOfseasonal     : num  27 47.8 12.1 22.6 25.6 ...
##  $ housing_percentOfrent         : num  0 0.856 0 1.938 0 ...
##  $ earnings_percenOfcurrentlabor : num  100 100 100 100 100 ...
##  $ popPov_percent                : num  8.48 5.77 6.62 21.17 5.05 ...
##  $ popBA_percent                 : num  10.6 24.1 27.9 24.6 16.9 ...
##  $ housingVacant_seasonal_percent: num  27 47.8 12.1 22.6 25.6 ...
##  $ housingVacant_rent_percent    : num  0 0.856 0 1.938 0 ...

summary(census)
##      GEOID                                                       NAME   
##  Min.   :3.300e+09   Alexandria town, Grafton County, New Hampshire: 1  
##  1st Qu.:3.300e+09   Alton town, Belknap County, New Hampshire     : 1  
##  Median :3.300e+09   Andover town, Merrimack County, New Hampshire : 1  
##  Mean   :3.301e+09   Ashland town, Grafton County, New Hampshire   : 1  
##  3rd Qu.:3.301e+09   Barnstead town, Belknap County, New Hampshire : 1  
##  Max.   :3.301e+09   Belmont town, Belknap County, New Hampshire   : 1  
##                      (Other)                                       :24  
##     popTotal       medianAge     popNative_USA     popNative    
##  Min.   :  582   Min.   :37.80   Min.   :  550   Min.   :  557  
##  1st Qu.: 1483   1st Qu.:44.77   1st Qu.: 1452   1st Qu.: 1457  
##  Median : 2934   Median :48.75   Median : 2865   Median : 2889  
##  Mean   : 3774   Mean   :48.59   Mean   : 3645   Mean   : 3669  
##  3rd Qu.: 4774   3rd Qu.:53.00   3rd Qu.: 4576   3rd Qu.: 4615  
##  Max.   :16171   Max.   :59.10   Max.   :15511   Max.   :15613  
##                                                                 
##   popNaitve_NH    popMoved_otherState popMoved_abroad  popCommute_car  
##  Min.   : 202.0   Min.   :  6.00      Min.   : 0.000   Min.   : 234.0  
##  1st Qu.: 698.5   1st Qu.: 23.25      1st Qu.: 0.000   1st Qu.: 710.8  
##  Median :1316.0   Median : 42.00      Median : 0.000   Median :1397.5  
##  Mean   :1881.0   Mean   : 77.80      Mean   : 6.467   Mean   :1670.0  
##  3rd Qu.:2405.8   3rd Qu.: 87.25      3rd Qu.:10.500   3rd Qu.:2273.0  
##  Max.   :9378.0   Max.   :470.00      Max.   :49.000   Max.   :7069.0  
##                                                                        
##  popCommute_publicT popCommute_bicycle popCommute_foot popCommute_other
##  Min.   : 0.0       Min.   : 0.000     Min.   :  0.0   Min.   :  0.00  
##  1st Qu.: 0.0       1st Qu.: 0.000     1st Qu.:  6.5   1st Qu.:  3.50  
##  Median : 0.0       Median : 0.000     Median : 15.0   Median : 13.50  
##  Mean   : 3.9       Mean   : 2.567     Mean   : 34.6   Mean   : 35.47  
##  3rd Qu.: 0.0       3rd Qu.: 0.000     3rd Qu.: 34.0   3rd Qu.: 51.00  
##  Max.   :72.0       Max.   :27.000     Max.   :238.0   Max.   :225.00  
##                                                                        
##  popCommute_home      popBA            popPov        medianIncome  
##  Min.   : 21.00   Min.   : 159.0   Min.   :  30.0   Min.   :44000  
##  1st Qu.: 36.00   1st Qu.: 401.5   1st Qu.: 146.2   1st Qu.:52204  
##  Median : 58.50   Median : 651.0   Median : 215.0   Median :61438  
##  Mean   : 93.07   Mean   : 833.0   Mean   : 390.8   Mean   :59857  
##  3rd Qu.:134.00   3rd Qu.: 973.8   3rd Qu.: 440.5   3rd Qu.:68043  
##  Max.   :360.00   Max.   :2995.0   Max.   :2557.0   Max.   :76676  
##                                                                    
##   earningsAgg        earningsAgg_WageSalary earningsAgg_SelfEmpl
##  Min.   : 11992400   Min.   : 10745100      Min.   : 1200400    
##  1st Qu.: 31442500   1st Qu.: 27256050      1st Qu.: 3072075    
##  Median : 67989550   Median : 60202850      Median : 6436850    
##  Mean   : 84895040   Mean   : 77478727      Mean   : 7416153    
##  3rd Qu.:117084150   3rd Qu.:110519975      3rd Qu.: 9046525    
##  Max.   :353736200   Max.   :325047100      Max.   :28684400    
##                                                                 
##        LF          LF_Civilian     LF_Civilian_Unemployed     LF_Not      
##  Min.   : 288.0   Min.   : 288.0   Min.   : 17.00         Min.   : 233.0  
##  1st Qu.: 821.8   1st Qu.: 821.8   1st Qu.: 40.75         1st Qu.: 530.8  
##  Median :1647.5   Median :1643.5   Median : 68.00         Median : 842.5  
##  Mean   :1979.8   Mean   :1978.9   Mean   :106.07         Mean   :1184.5  
##  3rd Qu.:2739.5   3rd Qu.:2739.5   3rd Qu.:120.00         3rd Qu.:1513.8  
##  Max.   :8468.0   Max.   :8461.0   Max.   :635.00         Max.   :5018.0  
##                                                                           
##   housingTotal  housingVacant_rent housingVacant_seasonal medianHomeValue 
##  Min.   : 498   Min.   :  0.0      Min.   :  27.0         Min.   :155600  
##  1st Qu.:1054   1st Qu.:  0.0      1st Qu.: 259.0         1st Qu.:178275  
##  Median :1826   Median :  0.0      Median : 520.5         Median :206800  
##  Mean   :2501   Mean   : 19.7      Mean   : 798.9         Mean   :226277  
##  3rd Qu.:3486   3rd Qu.: 13.5      3rd Qu.:1189.8         3rd Qu.:263650  
##  Max.   :9945   Max.   :208.0      Max.   :2999.0         Max.   :334000  
##                                                                           
##  medianGrossRent    unemplRate     LFparticipationRate
##  Min.   : 787.0   Min.   : 1.310   Min.   :45.93      
##  1st Qu.: 849.0   1st Qu.: 3.835   1st Qu.:57.72      
##  Median : 941.0   Median : 4.566   Median :63.42      
##  Mean   : 960.7   Mean   : 5.241   Mean   :62.30      
##  3rd Qu.:1027.0   3rd Qu.: 6.912   3rd Qu.:66.53      
##  Max.   :1223.0   Max.   :10.272   Max.   :74.84      
##  NA's   :1                                            
##  housing_percentOfseasonal housing_percentOfrent
##  Min.   : 1.477            Min.   :0.0000       
##  1st Qu.:18.398            1st Qu.:0.0000       
##  Median :29.462            Median :0.0000       
##  Mean   :30.980            Mean   :0.5421       
##  3rd Qu.:41.164            3rd Qu.:0.6900       
##  Max.   :68.020            Max.   :5.0510       
##                                                 
##  earnings_percenOfcurrentlabor popPov_percent   popBA_percent  
##  Min.   :100                   Min.   : 2.473   Min.   :10.61  
##  1st Qu.:100                   1st Qu.: 5.894   1st Qu.:15.94  
##  Median :100                   Median : 8.145   Median :24.35  
##  Mean   :100                   Mean   : 9.077   Mean   :23.81  
##  3rd Qu.:100                   3rd Qu.:10.869   3rd Qu.:29.02  
##  Max.   :100                   Max.   :21.167   Max.   :43.13  
##                                                                
##  housingVacant_seasonal_percent housingVacant_rent_percent
##  Min.   : 1.477                 Min.   :0.0000            
##  1st Qu.:18.398                 1st Qu.:0.0000            
##  Median :29.462                 Median :0.0000            
##  Mean   :30.980                 Mean   :0.5421            
##  3rd Qu.:41.164                 3rd Qu.:0.6900            
##  Max.   :68.020                 Max.   :5.0510            
## 

Suppose that you are writing a report on the economic importance of snowbirds in the Lakes Region Planning Commission’s Region. The director ask you what the share of seasonal homes in total (housing_percentOfseasonal) for a typical town in the region is.

Q1 Would you choose the mean or the median for the typical value?

i would choose mean ## Q2 Explain your answer. Because there are no extreme values that would affect the data ## Q3 What is the highest percentage of the seasonal homes by any town in the region? the highest percentage is 68.020%

# Create faceted histogram
ggplot(census, aes(x = housing_percentOfseasonal)) +
  geom_histogram() 


# Create box plots of city mpg by UR_aboveAve
ggplot(census, aes(x = 1, y = housing_percentOfseasonal)) +
  geom_boxplot()


# Create overlaid density plots for same data
ggplot(census, aes(x = housing_percentOfseasonal)) +
  geom_density(alpha = .3)

# If data has extreme values
census %>%
  summarize(median = median(housing_percentOfseasonal, na.rm = TRUE),
            IQR = IQR(housing_percentOfseasonal, na.rm = TRUE))
##     median      IQR
## 1 29.46153 22.76686

# If data doesn't have extreme values
census %>%
  summarize(mean = mean(housing_percentOfseasonal, na.rm = TRUE),
            sd = sd(housing_percentOfseasonal, na.rm = TRUE))
##       mean       sd
## 1 30.97958 16.97828

Suppose that director suspect that the share of seaonsal homes (popBA_percent) may be associated with the educational level of residents. Divide the towns into two groups: 1) educated towns (the share of population with Bachelor’s degree or higher than the average) and 2) other towns (the share of population with Bachelor’s degree or lower than the average). ## Q4 What is the share of seasonal homes in total in a typical educated town? around 27 ## Q5 What is the share of seasonal homes in total in a typical less educated educated town? around 31 ## Q6 What possible explanation you may have for the significant difference, if any? i calculated the data using the medium. If i used the mean, the answers would have increased due to the outliers in percent.

# Create a new variable, UR > or < average
UR_ave <- mean(census$popBA_percent)

census$UR_aboveAve <- ifelse(census$popBA_percent >= UR_ave, "equal or above ave", "below ave")

# Create box plots of total population by UR_aboveAve
ggplot(census, aes(x = UR_aboveAve, y = housingTotal)) +
  geom_boxplot()


# If data has extreme values
census %>%
  group_by(UR_aboveAve) %>%
  summarize(median = median(housing_percentOfseasonal, na.rm = TRUE),
            IQR = IQR(housing_percentOfseasonal, na.rm = TRUE))
## # A tibble: 2 x 3
##   UR_aboveAve        median   IQR
##   <chr>               <dbl> <dbl>
## 1 below ave            24.8  14.5
## 2 equal or above ave   35.8  18.5

# If data doesn't have extreme values
census %>%
  group_by(UR_aboveAve) %>%
  summarize(mean = mean(housing_percentOfseasonal, na.rm = TRUE),
            sd = sd(housing_percentOfseasonal, na.rm = TRUE))
## # A tibble: 2 x 3
##   UR_aboveAve         mean    sd
##   <chr>              <dbl> <dbl>
## 1 below ave           23.7  17.4
## 2 equal or above ave  37.3  14.3