# set the environment
Packages <- c("dplyr", "tidyverse","treemap","RColorBrewer","highcharter","readr","plotly",'psych')
lapply(Packages, library, character.only = TRUE)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ stringr 1.4.0
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ✓ readr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## [[1]]
## [1] "dplyr" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## [[2]]
## [1] "forcats" "stringr" "purrr" "readr" "tidyr" "tibble"
## [7] "ggplot2" "tidyverse" "dplyr" "stats" "graphics" "grDevices"
## [13] "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "treemap" "forcats" "stringr" "purrr" "readr" "tidyr"
## [7] "tibble" "ggplot2" "tidyverse" "dplyr" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "treemap" "forcats" "stringr" "purrr"
## [6] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [11] "dplyr" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[5]]
## [1] "highcharter" "RColorBrewer" "treemap" "forcats" "stringr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "dplyr" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "highcharter" "RColorBrewer" "treemap" "forcats" "stringr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "dplyr" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[7]]
## [1] "plotly" "highcharter" "RColorBrewer" "treemap" "forcats"
## [6] "stringr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "dplyr" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "psych" "plotly" "highcharter" "RColorBrewer" "treemap"
## [6] "forcats" "stringr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "dplyr" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
a = read_csv('Lab9sample.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## A_AGE = col_double(),
## SEX_recode = col_double(),
## PEARNVAL = col_double(),
## Educ = col_double(),
## Marital = col_double(),
## EDUC_Less_than_HS = col_double(),
## EDUC_HS = col_double(),
## EDUC_Some_College = col_double(),
## EDUC_BS = col_double(),
## EDUC_Masters = col_double(),
## EDUC_Prof_PhD = col_double(),
## Married = col_double(),
## Ex_married = col_double(),
## Never_married = col_double(),
## Log_earn = col_double()
## )
summary(a)
## A_AGE SEX_recode PEARNVAL Educ
## Min. :18.0 Min. :0.0000 Min. : 52 Min. :1.000
## 1st Qu.:30.0 1st Qu.:0.0000 1st Qu.: 23250 1st Qu.:2.000
## Median :38.0 Median :0.0000 Median : 44000 Median :4.000
## Mean :37.8 Mean :0.4596 Mean : 60806 Mean :3.777
## 3rd Qu.:46.0 3rd Qu.:1.0000 3rd Qu.: 72950 3rd Qu.:5.000
## Max. :54.0 Max. :1.0000 Max. :1099999 Max. :7.000
## Marital EDUC_Less_than_HS EDUC_HS EDUC_Some_College
## Min. :0.000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :2.000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :1.237 Mean :0.07506 Mean :0.2298 Mean :0.2748
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :2.000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## EDUC_BS EDUC_Masters EDUC_Prof_PhD Married
## Min. :0.0000 Min. :0.000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.00000 Median :1.0000
## Mean :0.2644 Mean :0.112 Mean :0.04388 Mean :0.5647
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.00000 Max. :1.0000
## Ex_married Never_married Log_earn
## Min. :0.0000 Min. :0.0000 Min. :1.716
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:4.366
## Median :0.0000 Median :0.0000 Median :4.643
## Mean :0.1074 Mean :0.3279 Mean :4.585
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:4.863
## Max. :1.0000 Max. :1.0000 Max. :6.041
lab9_r = lm(PEARNVAL ~ A_AGE+SEX_recode+Educ+Marital, data =a)
lab9_r
##
## Call:
## lm(formula = PEARNVAL ~ A_AGE + SEX_recode + Educ + Marital,
## data = a)
##
## Coefficients:
## (Intercept) A_AGE SEX_recode Educ Marital
## -37125.0 521.7 18622.9 15405.5 9268.3
summary(lab9_r)
##
## Call:
## lm(formula = PEARNVAL ~ A_AGE + SEX_recode + Educ + Marital,
## data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96450 -29850 -8177 12268 1023301
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -37125.0 12016.4 -3.090 0.002069 **
## A_AGE 521.7 286.3 1.822 0.068731 .
## SEX_recode 18622.9 5032.2 3.701 0.000229 ***
## Educ 15405.5 1509.9 10.203 < 2e-16 ***
## Marital 9268.3 3036.4 3.052 0.002340 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 72670 on 861 degrees of freedom
## Multiple R-squared: 0.144, Adjusted R-squared: 0.14
## F-statistic: 36.22 on 4 and 861 DF, p-value: < 2.2e-16
lab9_r1 = lm(PEARNVAL ~ A_AGE+SEX_recode+EDUC_Less_than_HS+EDUC_Some_College+EDUC_BS+EDUC_Masters+EDUC_Prof_PhD+Married+Ex_married, data =a)
summary(lab9_r1)
##
## Call:
## lm(formula = PEARNVAL ~ A_AGE + SEX_recode + EDUC_Less_than_HS +
## EDUC_Some_College + EDUC_BS + EDUC_Masters + EDUC_Prof_PhD +
## Married + Ex_married, data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -113005 -27566 -8226 12411 1025209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 995.1 11435.7 0.087 0.930677
## A_AGE 473.2 292.6 1.618 0.106139
## SEX_recode 18074.0 5075.7 3.561 0.000390 ***
## EDUC_Less_than_HS -15247.3 10379.6 -1.469 0.142209
## EDUC_Some_College 5083.5 7049.2 0.721 0.471016
## EDUC_BS 36534.7 7143.3 5.115 3.88e-07 ***
## EDUC_Masters 57650.1 9172.9 6.285 5.22e-10 ***
## EDUC_Prof_PhD 96642.9 12923.4 7.478 1.87e-13 ***
## Married 20697.9 6177.4 3.351 0.000842 ***
## Ex_married 12260.8 9325.1 1.315 0.188923
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 72370 on 856 degrees of freedom
## Multiple R-squared: 0.1559, Adjusted R-squared: 0.147
## F-statistic: 17.57 on 9 and 856 DF, p-value: < 2.2e-16
plot(lab9_r1)




hist(a$PEARNVAL)

lab9_r2 = lm(Log_earn ~ A_AGE+SEX_recode+EDUC_Less_than_HS+EDUC_Some_College+EDUC_BS+EDUC_Masters+EDUC_Prof_PhD+Married+Ex_married, data =a)
summary(lab9_r2)
##
## Call:
## lm(formula = Log_earn ~ A_AGE + SEX_recode + EDUC_Less_than_HS +
## EDUC_Some_College + EDUC_BS + EDUC_Masters + EDUC_Prof_PhD +
## Married + Ex_married, data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.70100 -0.14246 0.06935 0.22939 1.38160
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.046655 0.063970 63.259 < 2e-16 ***
## A_AGE 0.004926 0.001637 3.010 0.002690 **
## SEX_recode 0.207730 0.028393 7.316 5.87e-13 ***
## EDUC_Less_than_HS -0.153656 0.058063 -2.646 0.008285 **
## EDUC_Some_College 0.104791 0.039433 2.657 0.008019 **
## EDUC_BS 0.306821 0.039959 7.678 4.40e-14 ***
## EDUC_Masters 0.411654 0.051313 8.022 3.40e-15 ***
## EDUC_Prof_PhD 0.573503 0.072292 7.933 6.67e-15 ***
## Married 0.133918 0.034556 3.875 0.000115 ***
## Ex_married 0.104527 0.052164 2.004 0.045404 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4048 on 856 degrees of freedom
## Multiple R-squared: 0.2374, Adjusted R-squared: 0.2294
## F-statistic: 29.61 on 9 and 856 DF, p-value: < 2.2e-16
plot(lab9_r2)




hist(a$Log_earn)
