list.files()
## [1] "Database Philadelphia-5 (1).xlsx" "LIA R Analysis.Rmd"              
## [3] "LIA-R-Analysis.Rmd"
lia <- read_excel("Database Philadelphia-5 (1).xlsx")
## New names:
## • `` -> `...32`
## • `Phone:` -> `Phone:...37`
## • `Phone:` -> `Phone:...38`
head(lia)
## # A tibble: 6 × 39
##   Member Gender   Age Height Weight Ethnic Religious Active Marital
##    <dbl> <chr>  <dbl>  <dbl>  <dbl> <chr>  <chr>     <chr>  <chr>  
## 1 100357 F         29     69    165 NA     P         N      N      
## 2 100365 M         29     72    212 NA     O         Y      N      
## 3 100377 F         25     69    183 NA     O         Y      N      
## 4 100411 M         57     69    182 AA     O         Y      S      
## 5 100420 M         46     68    185 NA     C         N      W      
## 6 100422 F         37     69    160 NA     C         Y      D      
## # ℹ 30 more variables: `Number of Pets` <dbl>, Education <chr>,
## #   Occupation <chr>, Income <dbl>, Political <chr>, Health <chr>,
## #   Disabilities <chr>, Smoking <chr>, Alcohol <chr>, Communicable <chr>,
## #   Criminal <chr>, Bicycling <chr>, Cooking <chr>, Dancing <chr>,
## #   Gardening <chr>, Outdoor <chr>, Reading <chr>, Sports <chr>, Scuba <chr>,
## #   Theater <chr>, Travel <chr>, `Name:` <chr>, ...32 <chr>,
## #   `Street Address` <chr>, City <chr>, State <chr>, Zip <dbl>, …
names(lia)
##  [1] "Member"         "Gender"         "Age"            "Height"        
##  [5] "Weight"         "Ethnic"         "Religious"      "Active"        
##  [9] "Marital"        "Number of Pets" "Education"      "Occupation"    
## [13] "Income"         "Political"      "Health"         "Disabilities"  
## [17] "Smoking"        "Alcohol"        "Communicable"   "Criminal"      
## [21] "Bicycling"      "Cooking"        "Dancing"        "Gardening"     
## [25] "Outdoor"        "Reading"        "Sports"         "Scuba"         
## [29] "Theater"        "Travel"         "Name:"          "...32"         
## [33] "Street Address" "City"           "State"          "Zip"           
## [37] "Phone:...37"    "Phone:...38"    "Secure Inc."
summary(lia$Income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -204800   30933   43200   60126   60600 3725148
sd(lia$Income, na.rm = TRUE)
## [1] 138120.7
sum(is.na(lia$Income))
## [1] 0
quantile(lia$Income, probs = c(.25, .5, .75), na.rm = TRUE)
##      25%      50%      75% 
## 30933.33 43200.00 60600.00
ggplot(lia, aes(x = Income)) +
  geom_histogram(bins = 30)

ggplot(lia, aes(x = Income)) +
  geom_histogram(binwidth = 10000) +
  coord_cartesian(xlim = c(0, 200000))

ggplot(lia, aes(y = Income)) +
  geom_boxplot()

head(lia[order(lia$Income), c("Member", "Age", "Occupation", "Education", "Income")], 10)
## # A tibble: 10 × 5
##    Member   Age Occupation Education   Income
##     <dbl> <dbl> <chr>      <chr>        <dbl>
##  1 106134    58 Prof       A         -204800 
##  2 108741    53 Constr     B          -61700 
##  3 103139    29 Labor      B          -29733.
##  4 104538    29 Cler       H            9100 
##  5 108516    26 AF         S            9900 
##  6 105249    27 Cler       S           10300 
##  7 107813    28 Serv       B           11100 
##  8 104150    29 AF         S           12000 
##  9 102850    26 Trans      B           12300 
## 10 103767    28 Mech       H           12500
head(lia[order(-lia$Income), c("Member", "Age", "Occupation", "Education", "Income")], 10)
## # A tibble: 10 × 5
##    Member   Age Occupation Education  Income
##     <dbl> <dbl> <chr>      <chr>       <dbl>
##  1 101963    45 M&A        B         3725148
##  2 105966    34 Prof       A         1180040
##  3 106158    49 Ed         A          999999
##  4 104760    59 Prof       A          985900
##  5 109486    82 Mgmt       S          800000
##  6 103458    59 Ed         A          780000
##  7 105329    45 Prof       A          565000
##  8 107664    45 Prof       B          464200
##  9 103607    49 Labor      H          319100
## 10 103247    57 Prof       A          234400