R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Install the necessary packages if you haven't already
# install.packages("car") # for joint hypothesis testing

# Load libraries
library(car) # for joint significance testing
library(stats) # for OLS

# Simulate data 
set.seed(123)
n <- 100
data <- data.frame(
  psoda = rnorm(n, 2, 0.5),
  prpblck = runif(n, 0, 1),
  income = rnorm(n, 50000, 10000),
  prppov = runif(n, 0, 1),
  hseval = rnorm(n, 150000, 30000)
)

# Transform variables as required (log of psoda, income, and hseval)
data$log_psoda <- log(data$psoda)
data$log_income <- log(data$income)
data$log_hseval <- log(data$hseval)

# OLS regression without log(hseval)
model1 <- lm(log_psoda ~ prpblck + log_income + prppov, data = data)

# View the summary of the regression
summary(model1)
## 
## Call:
## lm(formula = log_psoda ~ prpblck + log_income + prppov, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7735 -0.1271  0.0020  0.1529  0.4312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.10415    1.43294   1.468    0.145
## prpblck      0.03073    0.08183   0.376    0.708
## log_income  -0.12584    0.13284  -0.947    0.346
## prppov      -0.13527    0.08180  -1.654    0.101
## 
## Residual standard error: 0.2359 on 96 degrees of freedom
## Multiple R-squared:  0.04255,    Adjusted R-squared:  0.01263 
## F-statistic: 1.422 on 3 and 96 DF,  p-value: 0.2411
# Calculate correlation between log(income) and prppov
cor_log_income_prppov <- cor(data$log_income, data$prppov)
cor_log_income_prppov
## [1] 0.05813531
# Get the p-values for each variable in the model
summary(model1)$coefficients[, 4]
## (Intercept)     prpblck  log_income      prppov 
##   0.1452618   0.7080556   0.3458700   0.1014628
# OLS regression with log(hseval)
model2 <- lm(log_psoda ~ prpblck + log_income + prppov + log_hseval, data = data)

# View the summary of the regression
summary(model2)
## 
## Call:
## lm(formula = log_psoda ~ prpblck + log_income + prppov + log_hseval, 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78410 -0.12513  0.00581  0.15580  0.42862 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.51598    1.98283   1.269    0.208
## prpblck      0.03279    0.08250   0.397    0.692
## log_income  -0.12804    0.13367  -0.958    0.341
## prppov      -0.13595    0.08222  -1.654    0.102
## log_hseval  -0.03269    0.10822  -0.302    0.763
## 
## Residual standard error: 0.237 on 95 degrees of freedom
## Multiple R-squared:  0.04347,    Adjusted R-squared:  0.003196 
## F-statistic: 1.079 on 4 and 95 DF,  p-value: 0.3712
# Extract the p-value for log(hseval)
summary(model2)$coefficients["log_hseval", 4]
## [1] 0.7632542
# Test whether log(income) and prppov are jointly significant in model2
linearHypothesis(model2, c("log_income = 0", "prppov = 0"))
## Linear hypothesis test
## 
## Hypothesis:
## log_income = 0
## prppov = 0
## 
## Model 1: restricted model
## Model 2: log_psoda ~ prpblck + log_income + prppov + log_hseval
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     97 5.5533                           
## 2     95 5.3363  2   0.21698 1.9314 0.1506
# Diagnostic plots for model2
par(mfrow = c(2, 2))  # 2x2 layout
plot(model2)

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.