# Linear Regression predicting price of wine
# Read in data
wine = read.csv("wine.csv")
## Warning: cannot open file 'wine.csv': No such file or directory
## Error: cannot open the connection
str(wine)
## Error: object 'wine' not found
summary(wine)
## Error: object 'wine' not found
# examine correlations
cor(wine)
## Error: object 'wine' not found
round(cor(wine), 2)
## Error: object 'wine' not found
wineReordered = wine[c("Price", "Year", "WinterRain", "AGST", "HarvestRain",
"Age", "FrancePop")]
## Error: object 'wine' not found
round(cor(wineReordered), 2)
## Error: object 'wineReordered' not found
pairs(wine)
## Error: object 'wine' not found
## Correlogram example
install.packages("corrgram")
## Installing package into '/Applications/RStudio.app/Contents/Resources/R/library'
## (as 'lib' is unspecified)
## Error: trying to use CRAN without setting a mirror
library(corrgram)
## Loading required package: seriation
corrgram(wineReordered, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie,
text.panel = panel.txt, main = "Predict price of wine")
## Error: object 'wineReordered' not found
# Linear Regression (one variable)
model1 = lm(Price ~ AGST, data = wine)
## Error: object 'wine' not found
summary(model1)
## Error: object 'model1' not found
# Sum of Squared Errors
model1$residuals
## Error: object 'model1' not found
SSE = sum(model1$residuals^2)
## Error: object 'model1' not found
SSE
## Error: object 'SSE' not found
# Linear Regression (two variables)
model2 = lm(Price ~ AGST + HarvestRain, data = wine)
## Error: object 'wine' not found
summary(model2)
## Error: object 'model2' not found
# Sum of Squared Errors
SSE = sum(model2$residuals^2)
## Error: object 'model2' not found
SSE
## Error: object 'SSE' not found
# Linear Regression (all variables)
model3 = lm(Price ~ AGST + HarvestRain + WinterRain + Age + FrancePop, data = wine)
## Error: object 'wine' not found
summary(model3)
## Error: object 'model3' not found
# Sum of Squared Errors
SSE = sum(model3$residuals^2)
## Error: object 'model3' not found
SSE
## Error: object 'SSE' not found
######
# Remove FrancePop
model4 = lm(Price ~ AGST + HarvestRain + WinterRain + Age, data = wine)
## Error: object 'wine' not found
summary(model4)
## Error: object 'model4' not found
# Correlations
cor(wine$WinterRain, wine$Price)
## Error: object 'wine' not found
cor(wine$Age, wine$FrancePop)
## Error: object 'wine' not found
cor(wine)
## Error: object 'wine' not found
# Remove Age and FrancePop
model5 = lm(Price ~ AGST + HarvestRain + WinterRain, data = wine)
## Error: object 'wine' not found
summary(model5)
## Error: object 'model5' not found
# Apply model on test set
# Read in test set
wineTest = read.csv("wine_test.csv")
## Warning: cannot open file 'wine_test.csv': No such file or directory
## Error: cannot open the connection
str(wineTest)
## Error: object 'wineTest' not found
# Make test set predictions
predictTest = predict(model4, newdata = wineTest)
## Error: object 'model4' not found
predictTest
## Error: object 'predictTest' not found
# Compute R-squared
SSE = sum((wineTest$Price - predictTest)^2)
## Error: object 'wineTest' not found
SST = sum((wineTest$Price - mean(wine$Price))^2)
## Error: object 'wineTest' not found
1 - SSE/SST
## Error: object 'SSE' not found