library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
head(merged_data$price,20)
## [1] 125 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100
## [20] 100
head(merged_data$review_scores_rating,20)
## [1] 100 94 94 94 94 94 94 94 94 94 94 94 94 94 94 94 94 94 94
## [20] 94
I’m analyzing the relationship between ‘price’ (independent variable, X) and ‘review_scores_rating’ (dependent variable, Y), where ‘price’ is the listing price in each country’s currency and ‘review_scores_rating’ is the listing of overall rating out of 100.
# Linear regression
model <- lm(review_scores_rating ~ price, data = merged_data)
# Model summary
summary(model)
##
## Call:
## lm(formula = review_scores_rating ~ price, data = merged_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76.179 -1.604 1.397 3.412 5.440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.456e+01 2.012e-03 46987.32 <2e-16 ***
## price 5.503e-05 8.196e-07 67.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.601 on 5367036 degrees of freedom
## (6105 observations deleted due to missingness)
## Multiple R-squared: 0.0008392, Adjusted R-squared: 0.000839
## F-statistic: 4508 on 1 and 5367036 DF, p-value: < 2.2e-16
# no NAs
merged_data <- na.omit(merged_data[, c("price", "review_scores_rating")])
# means
mean_x <- mean(merged_data$price)
mean_y <- mean(merged_data$review_scores_rating)
# covariance and variance
cov_xy <- cov(merged_data$price, merged_data$review_scores_rating)
var_x <- var(merged_data$price)
# Slope
slope <- cov_xy / var_x
# Intercept
intercept<- mean_y - slope * mean_x
cat("Estimated Intercept:", intercept, "\n")
## Estimated Intercept: 94.55945
cat("Estimated Slope:", slope, "\n")
## Estimated Slope: 5.502587e-05