# set the environment 
Packages <- c("dplyr", "tidyverse","treemap","RColorBrewer","highcharter","readr","plotly",'psych')

lapply(Packages, library, character.only = TRUE)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ stringr 1.4.0
## ✓ tidyr   1.1.3     ✓ forcats 0.5.1
## ✓ readr   1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## [[1]]
## [1] "dplyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
##  [1] "forcats"   "stringr"   "purrr"     "readr"     "tidyr"     "tibble"   
##  [7] "ggplot2"   "tidyverse" "dplyr"     "stats"     "graphics"  "grDevices"
## [13] "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "treemap"   "forcats"   "stringr"   "purrr"     "readr"     "tidyr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "dplyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "RColorBrewer" "treemap"      "forcats"      "stringr"      "purrr"       
##  [6] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [11] "dplyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "highcharter"  "RColorBrewer" "treemap"      "forcats"      "stringr"     
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "dplyr"        "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "highcharter"  "RColorBrewer" "treemap"      "forcats"      "stringr"     
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "dplyr"        "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "plotly"       "highcharter"  "RColorBrewer" "treemap"      "forcats"     
##  [6] "stringr"      "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "dplyr"        "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "psych"        "plotly"       "highcharter"  "RColorBrewer" "treemap"     
##  [6] "forcats"      "stringr"      "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "dplyr"        "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"
a = read_csv('Lab9sample.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   A_AGE = col_double(),
##   SEX_recode = col_double(),
##   PEARNVAL = col_double(),
##   Educ = col_double(),
##   Marital = col_double(),
##   EDUC_Less_than_HS = col_double(),
##   EDUC_HS = col_double(),
##   EDUC_Some_College = col_double(),
##   EDUC_BS = col_double(),
##   EDUC_Masters = col_double(),
##   EDUC_Prof_PhD = col_double(),
##   Married = col_double(),
##   Ex_married = col_double(),
##   Never_married = col_double(),
##   Log_earn = col_double()
## )
summary(a)
##      A_AGE        SEX_recode        PEARNVAL            Educ      
##  Min.   :18.0   Min.   :0.0000   Min.   :     52   Min.   :1.000  
##  1st Qu.:30.0   1st Qu.:0.0000   1st Qu.:  23250   1st Qu.:2.000  
##  Median :38.0   Median :0.0000   Median :  44000   Median :4.000  
##  Mean   :37.8   Mean   :0.4596   Mean   :  60806   Mean   :3.777  
##  3rd Qu.:46.0   3rd Qu.:1.0000   3rd Qu.:  72950   3rd Qu.:5.000  
##  Max.   :54.0   Max.   :1.0000   Max.   :1099999   Max.   :7.000  
##     Marital      EDUC_Less_than_HS    EDUC_HS       EDUC_Some_College
##  Min.   :0.000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   
##  Median :2.000   Median :0.00000   Median :0.0000   Median :0.0000   
##  Mean   :1.237   Mean   :0.07506   Mean   :0.2298   Mean   :0.2748   
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000   
##  Max.   :2.000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   
##     EDUC_BS        EDUC_Masters   EDUC_Prof_PhD        Married      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :0.00000   Median :1.0000  
##  Mean   :0.2644   Mean   :0.112   Mean   :0.04388   Mean   :0.5647  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.00000   Max.   :1.0000  
##    Ex_married     Never_married       Log_earn    
##  Min.   :0.0000   Min.   :0.0000   Min.   :1.716  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:4.366  
##  Median :0.0000   Median :0.0000   Median :4.643  
##  Mean   :0.1074   Mean   :0.3279   Mean   :4.585  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:4.863  
##  Max.   :1.0000   Max.   :1.0000   Max.   :6.041
lab9_r = lm(PEARNVAL ~ A_AGE+SEX_recode+Educ+Marital, data =a)
lab9_r
## 
## Call:
## lm(formula = PEARNVAL ~ A_AGE + SEX_recode + Educ + Marital, 
##     data = a)
## 
## Coefficients:
## (Intercept)        A_AGE   SEX_recode         Educ      Marital  
##    -37125.0        521.7      18622.9      15405.5       9268.3
summary(lab9_r)
## 
## Call:
## lm(formula = PEARNVAL ~ A_AGE + SEX_recode + Educ + Marital, 
##     data = a)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -96450  -29850   -8177   12268 1023301 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -37125.0    12016.4  -3.090 0.002069 ** 
## A_AGE          521.7      286.3   1.822 0.068731 .  
## SEX_recode   18622.9     5032.2   3.701 0.000229 ***
## Educ         15405.5     1509.9  10.203  < 2e-16 ***
## Marital       9268.3     3036.4   3.052 0.002340 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 72670 on 861 degrees of freedom
## Multiple R-squared:  0.144,  Adjusted R-squared:   0.14 
## F-statistic: 36.22 on 4 and 861 DF,  p-value: < 2.2e-16
lab9_r1 = lm(PEARNVAL ~ A_AGE+SEX_recode+EDUC_Less_than_HS+EDUC_Some_College+EDUC_BS+EDUC_Masters+EDUC_Prof_PhD+Married+Ex_married, data =a)

summary(lab9_r1)
## 
## Call:
## lm(formula = PEARNVAL ~ A_AGE + SEX_recode + EDUC_Less_than_HS + 
##     EDUC_Some_College + EDUC_BS + EDUC_Masters + EDUC_Prof_PhD + 
##     Married + Ex_married, data = a)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -113005  -27566   -8226   12411 1025209 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          995.1    11435.7   0.087 0.930677    
## A_AGE                473.2      292.6   1.618 0.106139    
## SEX_recode         18074.0     5075.7   3.561 0.000390 ***
## EDUC_Less_than_HS -15247.3    10379.6  -1.469 0.142209    
## EDUC_Some_College   5083.5     7049.2   0.721 0.471016    
## EDUC_BS            36534.7     7143.3   5.115 3.88e-07 ***
## EDUC_Masters       57650.1     9172.9   6.285 5.22e-10 ***
## EDUC_Prof_PhD      96642.9    12923.4   7.478 1.87e-13 ***
## Married            20697.9     6177.4   3.351 0.000842 ***
## Ex_married         12260.8     9325.1   1.315 0.188923    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 72370 on 856 degrees of freedom
## Multiple R-squared:  0.1559, Adjusted R-squared:  0.147 
## F-statistic: 17.57 on 9 and 856 DF,  p-value: < 2.2e-16
plot(lab9_r1)

hist(a$PEARNVAL)

lab9_r2 = lm(Log_earn ~ A_AGE+SEX_recode+EDUC_Less_than_HS+EDUC_Some_College+EDUC_BS+EDUC_Masters+EDUC_Prof_PhD+Married+Ex_married, data =a)

summary(lab9_r2)
## 
## Call:
## lm(formula = Log_earn ~ A_AGE + SEX_recode + EDUC_Less_than_HS + 
##     EDUC_Some_College + EDUC_BS + EDUC_Masters + EDUC_Prof_PhD + 
##     Married + Ex_married, data = a)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.70100 -0.14246  0.06935  0.22939  1.38160 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        4.046655   0.063970  63.259  < 2e-16 ***
## A_AGE              0.004926   0.001637   3.010 0.002690 ** 
## SEX_recode         0.207730   0.028393   7.316 5.87e-13 ***
## EDUC_Less_than_HS -0.153656   0.058063  -2.646 0.008285 ** 
## EDUC_Some_College  0.104791   0.039433   2.657 0.008019 ** 
## EDUC_BS            0.306821   0.039959   7.678 4.40e-14 ***
## EDUC_Masters       0.411654   0.051313   8.022 3.40e-15 ***
## EDUC_Prof_PhD      0.573503   0.072292   7.933 6.67e-15 ***
## Married            0.133918   0.034556   3.875 0.000115 ***
## Ex_married         0.104527   0.052164   2.004 0.045404 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4048 on 856 degrees of freedom
## Multiple R-squared:  0.2374, Adjusted R-squared:  0.2294 
## F-statistic: 29.61 on 9 and 856 DF,  p-value: < 2.2e-16
plot(lab9_r2)

hist(a$Log_earn)