true

Q1. Describe the relationship between the two variables - education level (bachelors) and income (per_capita_income).

# Load the package
library(openintro)
library(ggplot2)
str(countyComplete)
## 'data.frame':    3143 obs. of  53 variables:
##  $ state                                    : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ name                                     : Factor w/ 1877 levels "Abbeville County",..: 83 90 101 151 166 227 237 250 298 320 ...
##  $ FIPS                                     : num  1001 1003 1005 1007 1009 ...
##  $ pop2010                                  : num  54571 182265 27457 22915 57322 ...
##  $ pop2000                                  : num  43671 140415 29038 20826 51024 ...
##  $ age_under_5                              : num  6.6 6.1 6.2 6 6.3 6.8 6.5 6.1 5.7 5.3 ...
##  $ age_under_18                             : num  26.8 23 21.9 22.7 24.6 22.3 24.1 22.9 22.5 21.4 ...
##  $ age_over_65                              : num  12 16.8 14.2 12.7 14.7 13.5 16.7 14.3 16.7 17.9 ...
##  $ female                                   : num  51.3 51.1 46.9 46.3 50.5 45.8 53 51.8 52.2 50.4 ...
##  $ white                                    : num  78.5 85.7 48 75.8 92.6 23 54.4 74.9 58.8 92.7 ...
##  $ black                                    : num  17.7 9.4 46.9 22 1.3 70.2 43.4 20.6 38.7 4.6 ...
##  $ native                                   : num  0.4 0.7 0.4 0.3 0.5 0.2 0.3 0.5 0.2 0.5 ...
##  $ asian                                    : num  0.9 0.7 0.4 0.1 0.2 0.2 0.8 0.7 0.5 0.2 ...
##  $ pac_isl                                  : num  NA NA NA NA NA NA 0 0.1 0 0 ...
##  $ two_plus_races                           : num  1.6 1.5 0.9 0.9 1.2 0.8 0.8 1.7 1.1 1.5 ...
##  $ hispanic                                 : num  2.4 4.4 5.1 1.8 8.1 7.1 0.9 3.3 1.6 1.2 ...
##  $ white_not_hispanic                       : num  77.2 83.5 46.8 75 88.9 21.9 54.1 73.6 58.1 92.1 ...
##  $ no_move_in_one_plus_year                 : num  86.3 83 83 90.5 87.2 88.5 92.8 82.9 86.2 88.1 ...
##  $ foreign_born                             : num  2 3.6 2.8 0.7 4.7 1.1 1.1 2.5 0.9 0.5 ...
##  $ foreign_spoken_at_home                   : num  3.7 5.5 4.7 1.5 7.2 3.8 1.6 4.5 1.6 1.4 ...
##  $ hs_grad                                  : num  85.3 87.6 71.9 74.5 74.7 74.7 74.8 78.5 71.8 73.4 ...
##  $ bachelors                                : num  21.7 26.8 13.5 10 12.5 12 11 16.1 10.8 10.5 ...
##  $ veterans                                 : num  5817 20396 2327 1883 4072 ...
##  $ mean_work_travel                         : num  25.1 25.8 23.8 28.3 33.2 28.1 25.1 22.1 23.6 26.2 ...
##  $ housing_units                            : num  22135 104061 11829 8981 23887 ...
##  $ home_ownership                           : num  77.5 76.7 68 82.9 82 76.9 69 70.7 71.4 77.5 ...
##  $ housing_multi_unit                       : num  7.2 22.6 11.1 6.6 3.7 9.9 13.7 14.3 8.7 4.3 ...
##  $ median_val_owner_occupied                : num  133900 177200 88200 81200 113700 ...
##  $ households                               : num  19718 69476 9795 7441 20605 ...
##  $ persons_per_household                    : num  2.7 2.5 2.52 3.02 2.73 2.85 2.58 2.46 2.51 2.22 ...
##  $ per_capita_income                        : num  24568 26469 15875 19918 21070 ...
##  $ median_household_income                  : num  53255 50147 33219 41770 45549 ...
##  $ poverty                                  : num  10.6 12.2 25 12.6 13.4 25.3 25 19.5 20.3 17.6 ...
##  $ private_nonfarm_establishments           : num  877 4812 522 318 749 ...
##  $ private_nonfarm_employment               : num  10628 52233 7990 2927 6968 ...
##  $ percent_change_private_nonfarm_employment: num  16.6 17.4 -27 -14 -11.4 -18.5 2.1 -5.6 -45.8 5.4 ...
##  $ nonemployment_establishments             : num  2971 14175 1527 1192 3501 ...
##  $ firms                                    : num  4067 19035 1667 1385 4458 ...
##  $ black_owned_firms                        : num  15.2 2.7 NA 14.9 NA NA NA 7.2 NA NA ...
##  $ native_owned_firms                       : num  NA 0.4 NA NA NA NA NA NA NA NA ...
##  $ asian_owned_firms                        : num  1.3 1 NA NA NA NA 3.3 1.6 NA NA ...
##  $ pac_isl_owned_firms                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ hispanic_owned_firms                     : num  0.7 1.3 NA NA NA NA NA 0.5 NA NA ...
##  $ women_owned_firms                        : num  31.7 27.3 27 NA 23.2 38.8 NA 24.7 29.3 14.5 ...
##  $ manufacturer_shipments_2007              : num  NA 1410273 NA 0 341544 ...
##  $ mercent_whole_sales_2007                 : num  NA NA NA NA NA ...
##  $ sales                                    : num  598175 2966489 188337 124707 319700 ...
##  $ sales_per_capita                         : num  12003 17166 6334 5804 5622 ...
##  $ accommodation_food_service               : num  88157 436955 NA 10757 20941 ...
##  $ building_permits                         : num  191 696 10 8 18 1 3 107 10 6 ...
##  $ fed_spending                             : num  331142 1119082 240308 163201 294114 ...
##  $ area                                     : num  594 1590 885 623 645 ...
##  $ density                                  : num  91.8 114.6 31 36.8 88.9 ...

ggplot(data = countyComplete, aes(x = bachelors ,y = per_capita_income )) + #countyComplete dataset is from openintro rpackage
  geom_point()


cor(countyComplete$per_capita_income, countyComplete$bachelors, use = "pairwise.complete.obs")
## [1] 0.7924464

Q2. See model 1. Is the coefficient of bachelors statistically significant at 5%? Interpret the coefficient.

Yes, the coefficent of bachelors is sgnificant. The coefficient of 0.013264 suggests that the price increases by $13.26 per pound.

Q3. See model 1. How much a typical person is predicted to make a year in a county that has 70% of its population with a bachelor’s degree?

Q4. See model 1. What is the reported residual standard error? What does it mean?

The residual standard error of 7.575 means that the model misses the actual values by $7,575.

Q5. See model 1. What is the reported adjusted R squared? What does it mean?

The model explains that 56.7% of the variations in the price and 0.5666 is the adjusted R sqaured.

Run a second regression model for income (per_capita_income) with two explanatory variables: education (bachelors) and density (density), and answer Q6.

Q6. Compare model 1 and model 2. Which of the two models better fits the data? Discuss your answer by comparing the residual standard error and the adjusted R squared between the two models.

Model 2 is a better fit for the data than model 1. Model 2 explains the variations in the price to a greater extent: its adjusted R squared of 0.5975 is higher than 0.5666 of Model 1. Model 2 is more accurate in predicting prices: its residual standard error of 7.3 is lower than 7.575 of Model 1.

# Create a linear model 1
mod_1 <- lm(per_capita_income ~ bachelors, data = countyComplete)

# View summary of model 1
summary(mod_1)
## 
## Call:
## lm(formula = per_capita_income ~ bachelors, data = countyComplete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18032.7  -1708.2     73.8   1748.0  21756.5 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13087.680    142.091   92.11   <2e-16 ***
## bachelors     494.753      6.795   72.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3299 on 3141 degrees of freedom
## Multiple R-squared:  0.628,  Adjusted R-squared:  0.6279 
## F-statistic:  5302 on 1 and 3141 DF,  p-value: < 2.2e-16

# Create a linear model 2
mod_2 <- lm(per_capita_income ~ bachelors + density, data = countyComplete)

# View summary of model 2
summary(mod_2)
## 
## Call:
## lm(formula = per_capita_income ~ bachelors + density, data = countyComplete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18257.9  -1707.7     71.5   1749.6  22101.1 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.319e+04  1.433e+02  92.042  < 2e-16 ***
## bachelors   4.872e+02  6.963e+00  69.973  < 2e-16 ***
## density     1.623e-01  3.499e-02   4.639 3.65e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3289 on 3140 degrees of freedom
## Multiple R-squared:  0.6305, Adjusted R-squared:  0.6303 
## F-statistic:  2679 on 2 and 3140 DF,  p-value: < 2.2e-16