## Descriptive Statistics ~
#Load Libraries
library(wooldridge)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
data(bwght)
?bwght
## starting httpd help server ... done
head(bwght)
## faminc cigtax cigprice bwght fatheduc motheduc parity male white cigs
## 1 13.5 16.5 122.3 109 12 12 1 1 1 0
## 2 7.5 16.5 122.3 133 6 12 2 1 0 0
## 3 0.5 16.5 122.3 129 NA 12 2 0 0 0
## 4 15.5 16.5 122.3 126 12 12 2 1 0 0
## 5 27.5 16.5 122.3 134 14 12 2 1 1 0
## 6 7.5 16.5 122.3 118 12 14 6 1 0 0
## lbwght bwghtlbs packs lfaminc
## 1 4.691348 6.8125 0 2.6026897
## 2 4.890349 8.3125 0 2.0149031
## 3 4.859812 8.0625 0 -0.6931472
## 4 4.836282 7.8750 0 2.7408400
## 5 4.897840 8.3750 0 3.3141861
## 6 4.770685 7.3750 0 2.0149031
bwght_data <- bwght %>%
select(motheduc, faminc, cigs, parity)
describe(bwght_data)
## vars n mean sd median trimmed mad min max range skew
## motheduc 1 1387 12.94 2.38 12.0 12.95 1.48 2.0 18 16.0 -0.03
## faminc 2 1388 29.03 18.74 27.5 27.72 17.79 0.5 65 64.5 0.62
## cigs 3 1388 2.09 5.97 0.0 0.39 0.00 0.0 50 50.0 3.56
## parity 4 1388 1.63 0.89 1.0 1.47 0.00 1.0 6 5.0 1.63
## kurtosis se
## motheduc 0.64 0.06
## faminc -0.53 0.50
## cigs 14.91 0.16
## parity 2.93 0.02
stargazer(bwght_data, type = "text", title = "Descriptive statistics", digits = 2)
##
## Descriptive statistics
## =========================================
## Statistic N Mean St. Dev. Min Max
## -----------------------------------------
## motheduc 1,387 12.94 2.38 2 18
## faminc 1,388 29.03 18.74 0.50 65.00
## cigs 1,388 2.09 5.97 0 50
## parity 1,388 1.63 0.89 1 6
## -----------------------------------------
# this is a data based on descriptive statistics. This gives- mean, median, standard deviation, min/max and kurtosis.
# The data shows that most of the mothers have completed high school with an average income of $29,000 per year. Cigarette used during pregnancy is generally low. Average parity is 1.63, showing most mothers are first or second time parents. It shows how mother education, income, health behavior and reproductive history influences maternal and child outcome.
## Histogram and scatterplot
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(wooldridge)
#load the data set
data(wage1)
head(wage1)
## wage educ exper tenure nonwhite female married numdep smsa northcen south
## 1 3.10 11 2 0 0 1 0 2 1 0 0
## 2 3.24 12 22 2 0 1 1 3 1 0 0
## 3 3.00 11 2 0 0 0 0 2 0 0 0
## 4 6.00 8 44 28 0 0 1 0 1 0 0
## 5 5.30 12 7 2 0 0 1 1 0 0 0
## 6 8.75 16 9 8 0 0 1 0 1 0 0
## west construc ndurman trcommpu trade services profserv profocc clerocc
## 1 1 0 0 0 0 0 0 0 0
## 2 1 0 0 0 0 1 0 0 0
## 3 1 0 0 0 1 0 0 0 0
## 4 1 0 0 0 0 0 0 0 1
## 5 1 0 0 0 0 0 0 0 0
## 6 1 0 0 0 0 0 1 1 0
## servocc lwage expersq tenursq
## 1 0 1.131402 4 0
## 2 1 1.175573 484 4
## 3 0 1.098612 4 0
## 4 0 1.791759 1936 784
## 5 0 1.667707 49 4
## 6 0 2.169054 81 64
ggplot(wage1, aes(x = wage)) + geom_histogram(binwidth = 1, fill = "plum", color = "blue") +
labs(title = "histogram of wages",
x = "wages ($ per hour)",
y = "Frequency") +
theme_minimal()

#this histogram shows how wages are distributed. Y-axis shows how many individuals earn wages within each bin range. As it shows right skewed, more people earn lower wages and very less people earn high wages.
ggplot(wage1, aes(x = educ, y = wage)) +
geom_point(color = "darkgreen", alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "wage vs education",
x = "years of eductaion",
y = "wage ($ per hour)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# the above scatterpot shows a positive slop, indicating a positive relationship between education and higher wages
ggplot(wage1, aes(x = exper, y = wage)) +
geom_point(colour = "Purple", alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE, color = "blue") +
labs(title = "wage vs experience",
x = "years of experience",
y = "wage ($ per hour)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# This scatterplot shows wage trends along with work experience
# blue - loess curve, indicates a non linear pattern where the wages rise early in career
## Bivariate regression ~
#install.packages("wooldgridge")
library(wooldridge)
data("wage1")
head(wage1)
## wage educ exper tenure nonwhite female married numdep smsa northcen south
## 1 3.10 11 2 0 0 1 0 2 1 0 0
## 2 3.24 12 22 2 0 1 1 3 1 0 0
## 3 3.00 11 2 0 0 0 0 2 0 0 0
## 4 6.00 8 44 28 0 0 1 0 1 0 0
## 5 5.30 12 7 2 0 0 1 1 0 0 0
## 6 8.75 16 9 8 0 0 1 0 1 0 0
## west construc ndurman trcommpu trade services profserv profocc clerocc
## 1 1 0 0 0 0 0 0 0 0
## 2 1 0 0 0 0 1 0 0 0
## 3 1 0 0 0 1 0 0 0 0
## 4 1 0 0 0 0 0 0 0 1
## 5 1 0 0 0 0 0 0 0 0
## 6 1 0 0 0 0 0 1 1 0
## servocc lwage expersq tenursq
## 1 0 1.131402 4 0
## 2 1 1.175573 484 4
## 3 0 1.098612 4 0
## 4 0 1.791759 1936 784
## 5 0 1.667707 49 4
## 6 0 2.169054 81 64
model1 <-lm(wage ~ educ, data = wage1)
summary(model1)
##
## Call:
## lm(formula = wage ~ educ, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3396 -2.1501 -0.9674 1.1921 16.6085
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.90485 0.68497 -1.321 0.187
## educ 0.54136 0.05325 10.167 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.378 on 524 degrees of freedom
## Multiple R-squared: 0.1648, Adjusted R-squared: 0.1632
## F-statistic: 103.4 on 1 and 524 DF, p-value: < 2.2e-16
coef(summary(model1))
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9048516 0.68496782 -1.321013 1.870735e-01
## educ 0.5413593 0.05324804 10.166746 2.782599e-22
summary(model1)$r.squared
## [1] 0.1647575
model2 <- lm(wage ~ exper, data = wage1)
summary(model2)
##
## Call:
## lm(formula = wage ~ exper, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.936 -2.458 -1.112 1.077 18.716
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.37331 0.25699 20.908 < 2e-16 ***
## exper 0.03072 0.01181 2.601 0.00955 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.673 on 524 degrees of freedom
## Multiple R-squared: 0.01275, Adjusted R-squared: 0.01086
## F-statistic: 6.766 on 1 and 524 DF, p-value: 0.009555
stargazer(model1, model2, type = "text",
title = "Bivariate Regression results",
column.labels = c("Educ vs wage", "Exper vs wage"),
dep.var.labels = "wage",
covariate.labels =
c("Education", "Experience"),
align = TRUE)
##
## Bivariate Regression results
## ===========================================================
## Dependent variable:
## ----------------------------
## wage
## Educ vs wage Exper vs wage
## (1) (2)
## -----------------------------------------------------------
## Education 0.541***
## (0.053)
##
## Experience 0.031***
## (0.012)
##
## Constant -0.905 5.373***
## (0.685) (0.257)
##
## -----------------------------------------------------------
## Observations 526 526
## R2 0.165 0.013
## Adjusted R2 0.163 0.011
## Residual Std. Error (df = 524) 3.378 3.673
## F Statistic (df = 1; 524) 103.363*** 6.766***
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Both education and experience affect wages significantly. However, education has a stronger impact explaining more variation in hourly wages than experience alone.
##Multivariate regression ~
library(wooldridge)
library(stargazer)
library(dplyr)
library(ggplot2)
data("wage1")
head(wage1)
## wage educ exper tenure nonwhite female married numdep smsa northcen south
## 1 3.10 11 2 0 0 1 0 2 1 0 0
## 2 3.24 12 22 2 0 1 1 3 1 0 0
## 3 3.00 11 2 0 0 0 0 2 0 0 0
## 4 6.00 8 44 28 0 0 1 0 1 0 0
## 5 5.30 12 7 2 0 0 1 1 0 0 0
## 6 8.75 16 9 8 0 0 1 0 1 0 0
## west construc ndurman trcommpu trade services profserv profocc clerocc
## 1 1 0 0 0 0 0 0 0 0
## 2 1 0 0 0 0 1 0 0 0
## 3 1 0 0 0 1 0 0 0 0
## 4 1 0 0 0 0 0 0 0 1
## 5 1 0 0 0 0 0 0 0 0
## 6 1 0 0 0 0 0 1 1 0
## servocc lwage expersq tenursq
## 1 0 1.131402 4 0
## 2 1 1.175573 484 4
## 3 0 1.098612 4 0
## 4 0 1.791759 1936 784
## 5 0 1.667707 49 4
## 6 0 2.169054 81 64
model_multi <- lm(wage ~ educ + exper + tenure, data = wage1)
summary(model_multi)
##
## Call:
## lm(formula = wage ~ educ + exper + tenure, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6068 -1.7747 -0.6279 1.1969 14.6536
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.87273 0.72896 -3.941 9.22e-05 ***
## educ 0.59897 0.05128 11.679 < 2e-16 ***
## exper 0.02234 0.01206 1.853 0.0645 .
## tenure 0.16927 0.02164 7.820 2.93e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.084 on 522 degrees of freedom
## Multiple R-squared: 0.3064, Adjusted R-squared: 0.3024
## F-statistic: 76.87 on 3 and 522 DF, p-value: < 2.2e-16
stargazer(model_multi, type = "text",
title = "Multivariate regression : Determinants of wage",
dep.var.labels = "wage",
covariate.labels =
c("Education", "Experience", "Tenure"),
report = "vcstp",
digits = 3)
##
## Multivariate regression : Determinants of wage
## ===============================================
## Dependent variable:
## ---------------------------
## wage
## -----------------------------------------------
## Education 0.599
## (0.051)
## t = 11.679
## p = 0.000
##
## Experience 0.022
## (0.012)
## t = 1.853
## p = 0.065
##
## Tenure 0.169
## (0.022)
## t = 7.820
## p = 0.000
##
## Constant -2.873
## (0.729)
## t = -3.941
## p = 0.0001
##
## -----------------------------------------------
## Observations 526
## R2 0.306
## Adjusted R2 0.302
## Residual Std. Error 3.084 (df = 522)
## F Statistic 76.873*** (df = 3; 522)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Education has strongest positive effect as each year it increases the age by $0.59, showing highly significant result
#tenure significantly increases wage by about 0.16
# experience has smaller impact