library(readxl)
MLB_Beer_Prices <- read_excel("C:/Users/Sameekhsya Behura/Desktop/MLB Beer Prices.xlsx", col_types = c("numeric", "text", "text", "text", "numeric", "numeric", "numeric"))
View(MLB_Beer_Prices)
mydata <- MLB_Beer_Prices
str(mydata)
## Classes 'tbl_df', 'tbl' and 'data.frame':    155 obs. of  7 variables:
##  $ Year         : num  2013 2013 2013 2013 2013 ...
##  $ Team         : chr  "Arizona Diamondbacks" "Atlanta Braves" "Baltimore Orioles" "Boston Red Sox" ...
##  $ Nickname     : chr  "Diamondbacks" "Braves" "Orioles" "Red Sox" ...
##  $ City         : chr  "Arizona" "Atlanta" "Baltimore" "Boston" ...
##  $ Price        : num  4 7.25 6.75 7.25 7.25 6.5 5.5 4 6 5 ...
##  $ Size         : num  14 16 18 12 16 16 12 12 16 12 ...
##  $ PriceperOunce: num  0.286 0.453 0.375 0.604 0.453 ...
library(GGally)
## Warning: package 'GGally' was built under R version 3.5.1
## Loading required package: ggplot2
ggpairs(data=mydata, columns=5:7, title="Visualization")

There is a linear relationship between price and price per ounce. We can observe a negative correlation between Size and Price per Ounce. Correlation between Size and Price per ounce is negative but the relationship is not so strong.

scatter.smooth(mydata$Size, mydata$Price)

scatter.smooth( mydata$Year,mydata$Size)

scatter.smooth(mydata$Size, mydata$PriceperOunce)

scatter.smooth(mydata$Year, mydata$PriceperOunce)

scatter.smooth(mydata$Year, mydata$Price)

Scatter plots show some beahaviours of the variable. We can observe a negative relationship between Size and Price Per Ounce.

Null : There is no relationship between price per ounce and price. Alternative : There is iome relationship between Price per Ounce and Price.

fit_1  <- lm(PriceperOunce ~ Price, data = mydata)
summary(fit_1)
## 
## Call:
## lm(formula = PriceperOunce ~ Price, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11557 -0.04051 -0.01175  0.04064  0.17218 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.178576   0.025070   7.123 3.87e-11 ***
## Price       0.039489   0.004074   9.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06267 on 153 degrees of freedom
## Multiple R-squared:  0.3805, Adjusted R-squared:  0.3764 
## F-statistic: 93.97 on 1 and 153 DF,  p-value: < 2.2e-16
ggplot(data = mydata, aes(x = Price, y = PriceperOunce)) + geom_point()  +
  stat_smooth(method = "lm", col = "dodgerblue3") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line.x=element_line(),
        axis.line.y=element_line()) +
  ggtitle("Linear Model Fitted to Data")

predict(fit_1, data.frame(Price = 4))
##        1 
## 0.336533
fit_2 <- lm(PriceperOunce ~ Price + Size, data = mydata)
summary(fit_2)
## 
## Call:
## lm(formula = PriceperOunce ~ Price + Size, data = mydata)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057384 -0.004173 -0.001961  0.006421  0.036784 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.4075035  0.0058948   69.13   <2e-16 ***
## Price        0.0721572  0.0009174   78.65   <2e-16 ***
## Size        -0.0291385  0.0004508  -64.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01178 on 152 degrees of freedom
## Multiple R-squared:  0.9783, Adjusted R-squared:  0.978 
## F-statistic:  3419 on 2 and 152 DF,  p-value: < 2.2e-16

It is a better fit model.

We make a grid of values for our predictor variables

Price <- seq(9,21, by=0.5)
Size <- seq(60,90, by=0.5)
pred_grid <- expand.grid(Price = Price, Size = Size)

Next, we make predictions for Price per ounce based on the predictor variable grid:

pred_grid$PriceperOunce2 <-predict(fit_2, new = pred_grid)
library(scatterplot3d)
fit_2_sp <- scatterplot3d(pred_grid$Price, pred_grid$Size, pred_grid$PriceperOunce2, angle = 60, color = "blue", pch = 1, ylab = "Size", xlab = "Price", zlab = "Price Per Ounce")
fit_2_sp$points3d(mydata$Price,mydata$Size, mydata$PriceperOunce, pch=16)

predict(fit_2, data.frame(Price = 4, Size = 12))
##         1 
## 0.3464704