# Load necessary libraries
library(ggplot2)
# Load data
df <- read.csv("food_coded.csv")
df_subset <- df[1:100, ]
# Clean up GPA and calories_day
df_subset$GPA <- as.numeric(df_subset$GPA)
## Warning: NAs introduced by coercion
df_clean <- na.omit(df_subset[, c("GPA", "calories_day")])
# Plot GPA vs Calories per Day
ggplot(df_clean, aes(x = GPA, y = calories_day)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "GPA vs Daily Calorie Intake",
x = "GPA",
y = "Calories per Day")
## `geom_smooth()` using formula = 'y ~ x'

# Run regression
model <- lm(calories_day ~ GPA, data = df_clean)
summary(model)
##
## Call:
## lm(formula = calories_day ~ GPA, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.02948 -0.02579 -0.02301 -0.02160 0.97839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.039656 0.594886 5.110 2.07e-06 ***
## GPA -0.004624 0.172431 -0.027 0.979
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6242 on 82 degrees of freedom
## Multiple R-squared: 8.768e-06, Adjusted R-squared: -0.01219
## F-statistic: 0.000719 on 1 and 82 DF, p-value: 0.9787