# Load the data
URL <- "https://raw.githubusercontent.com/DS4PS/cpp-523-fall-2019/master/labs/data/IncomeHappiness.csv"
dat <- read.csv(URL)
# Fit the quadratic regression model
model <- lm(happiness ~ income + I(income^2), data = dat)
# Predict y_hat using the model
dat$y_hat <- predict(model, newdata = dat)
# Plot the data and the fitted quadratic regression line
plot(dat$income, dat$happiness,
xlab="Income (Thousands of Dollars)", ylab="Happiness Scale",
main="Does Money Make You Happy?",
pch=19, col="darkorange", bty="n",
xaxt="n")
axis(side=1, at=c(0,50000,100000,150000,200000), labels=c("$0","$50k","$100k","$150k","$200k"))
lines(dat$income, dat$y_hat, col=gray(0.3,0.5), lwd=6)
Questions
\(Happiness = b_0+b_1 Income+ b_2 (Income)^2+e\)
y.happy <- dat$happiness
x.income <- dat$income
x2.income.sq <- x.income*x.income
options( scipen=8 )
summary( lm( y.happy ~ x.income + x2.income.sq ) )
##
## Call:
## lm(formula = y.happy ~ x.income + x2.income.sq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.1420 -3.9703 -0.0493 3.9720 20.4357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.34826872249546 0.36139931707627 97.81 <2e-16 ***
## x.income 0.00073610233669 0.00000887024309 82.99 <2e-16 ***
## x2.income.sq -0.00000000251607 0.00000000004385 -57.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.806 on 1997 degrees of freedom
## Multiple R-squared: 0.8829, Adjusted R-squared: 0.8828
## F-statistic: 7529 on 2 and 1997 DF, p-value: < 2.2e-16
# divide income by 10,000
y.happy <- dat$happiness
x.income <- dat$income / 10000
x2.income.sq <- x.income*x.income
summary( lm( y.happy ~ x.income + x2.income.sq ) )
##
## Call:
## lm(formula = y.happy ~ x.income + x2.income.sq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.1420 -3.9703 -0.0493 3.9720 20.4357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.348269 0.361399 97.81 <2e-16 ***
## x.income 7.361023 0.088702 82.99 <2e-16 ***
## x2.income.sq -0.251607 0.004385 -57.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.806 on 1997 degrees of freedom
## Multiple R-squared: 0.8829, Adjusted R-squared: 0.8828
## F-statistic: 7529 on 2 and 1997 DF, p-value: < 2.2e-16
# replace with model coefficients
b0 <- 35.348269
b1 <- 7.361023
b2 <- -0.251607
x <- 15 # use 15000 if you did not rescale above
happy.15k <- b0 + b1*x + b2*x*x
x <- 25 # use 25000 if you did not rescale above
happy.25k <- b0 + b1*x + b2*x*x
happy.25k - happy.15k # marginal effect of $10k increase at $15k starting salary
## [1] -27.03257
# replace with model coefficients
b0 <- 35.348269
b1 <- 7.361023
b2 <- -0.251607
x <- 75 # use 15000 if you did not rescale above
happy.75k <- b0 + b1*x + b2*x*x
x <- 85 # use 25000 if you did not rescale above
happy.85k <- b0 + b1*x + b2*x*x
happy.85k - happy.75k # marginal effect of $10k increase at $75k starting salary
## [1] -328.961
# replace with model coefficients
b0 <- 35.348269
b1 <- 7.361023
b2 <- -0.251607
x <- 100 # use 15000 if you did not rescale above
happy.100k <- b0 + b1*x + b2*x*x
x <- 110 # use 25000 if you did not rescale above
happy.110k <- b0 + b1*x + b2*x*x
happy.110k - happy.100k # marginal effect of $10k increase at $75k starting salary
## [1] -454.7645
For this part of the final assignment you will be using a dataset that examines compensation of nonprofit executive directors from the years 2012-2013. The data is extracted from the IRS E-Filer database available on AWS.
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
URL <- "https://github.com/DS4PS/cpp-523-fall-2019/blob/master/labs/data/np-comp-data.rds?raw=true"
dat <- readRDS(gzcon(url( URL )))
set.seed( 1234 )
d2 <- sample_n( dat, 2000 )
The full codebook is below for reference. We will use these two variables for the model that predicts nonprofit executive salaries based upon the size of the nonprofit.
plot( log(d2$REVENUE), log(d2$SALARY), bty="n", pch=19, col="darkorange",
xlab="Nonprofit Revenue (logged)", ylab="Executive Director Salary (logged)",
xlim=c(5,25), ylim=c(5,16))
abline( h=seq( 1, 20, 0.5 ), col=gray(0.5,0.2), lwd=1 )
abline( v=seq( 1, 25, 0.5 ), col=gray(0.5,0.2), lwd=1 )
abline( lm( log(d2$SALARY) ~ log(d2$REVENUE) ), col=gray(0.5,0.5), lwd=3 )
plot( log(d2$REVENUE), log(d2$SALARY), bty="n", pch=19, col=gray(0.5,0.2), cex=1.2,
xlab="Nonprofit Revenue (logged)", ylab="Executive Director Salary (logged)",
xlim=c(5,25), ylim=c(5,16))
abline( lm( log(d2$SALARY) ~ log(d2$REVENUE) ), col="darkorange", lwd=3 )
points( mean(log(d2$REVENUE)), mean(log(d2$SALARY)), pch=19, col="darkorange", cex=2 )
points( c(8,8), c(12,6),
cex=3, col="steelblue", lwd=2 )
points( c(8,8), c(12,6),
cex=1.5, col="steelblue", pch=19 )
text( c(8,8), c(12,6), c("A","B"),
pos=4, offset=1.2, col="steelblue", cex=2 )

The image in your template may differ because it’s based on a random sample of 2,000 points to reduce data density so the visualization is not cluttered. Use this image to answer the question.
Will it make the slope larger or smaller? The slope will become smaller.
Would it contribute to a Type I or Type II error? Type 2 Error.
Will it make the slope larger or smaller? The slope will become larger.
Would it contribute to a Type I or Type II error? Type 1 Error.
exp() function.Note there is a decimal after the 14:
14.44879
exp(14.44879)
## [1] 1883778
The average revenue of a nonprofit is $1,883,778.
\(log(Salary) = 6.367 + 0.343 \cdot log(Revenue)\)
\(log(Salary) = 6.367 + 0.343 \cdot log(14.44879)\) = log(6.7648221973)
exp(6.7648221973)
## [1] 866.8121
The salary for a director of an average nonprofit would be $866.812
install.packages("stargazer")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
m <- lm( log(SALARY) ~ log(REVENUE), data=d2 )
stargazer( m, type="html",
omit.stat = c("rsq","f","ser"),
notes.label = "Standard errors in parentheses" )
| Dependent variable: | |
| log(SALARY) | |
| log(REVENUE) | 0.354*** |
| (0.009) | |
| Constant | 6.193*** |
| (0.128) | |
| Observations | 2,000 |
| Adjusted R2 | 0.445 |
| Standard errors in parentheses | p<0.1; p<0.05; p<0.01 |
A 1% change in x, log(Revenue), will result in a 0.354% change in y, log(Salary).