# Simple Linear Regression-Q1
##predict weight(Y/Output) gained using calories(X/input) consumed
Q1 <- read.csv("D:\\DataScience\\Assignments\\SimpleLinearRegression\\calories_consumed.csv") #Importing Data set #
attach(Q1)
View(Q1)
# Column Names in given dataset Q1
colnames(Q1)
## [1] "Weight" "Calories"
# 1St Movement Business Decission(Mean,Meadian,Range)
summary(Calories)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1400 1728 2250 2341 2775 3900
#Based on Calories Summary we can expect some +ve skewness here
# 2Nd movement Business Decission(Variance,Standard Deviation)
var(Calories)
## [1] 565668.7
sd(Calories)
## [1] 752.1095
# 3rd & 4th Business Decission(Skewness and Kurtosis)
library(e1071)
skewness(Calories)
## [1] 0.5212708
kurtosis(Calories)
## [1] -0.9277095
barplot(Calories)

hist(Calories)

# Based on Histograme we Confirmed as some +ve Skewness we have.
boxplot(Calories,horizontal = T )

#Based on Boxplot we dont have outliears but we have some +ve Skew
qqnorm(Q1$Calories)
qqline(Q1$Calories)

# Based on qqnorm we confirmed as Calories data is linearly Distributed.
#Corelation Coefficient(r-value>0.85 Strong Corelation) value for weight and calaries
cor(Weight,Calories)
## [1] 0.946991
#Based on this value we can build 94% Accurate Model
plot(Q1)

# Scatter Plot is used for relation between two variables.
#Based on this scatter plot we Can expect this model variables are having +ve Strong Corelation
Model1 <- lm(Weight ~ Calories)
summary(Model1)
##
## Call:
## lm(formula = Weight ~ Calories)
##
## Residuals:
## Min 1Q Median 3Q Max
## -158.67 -107.56 36.70 81.68 165.53
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -625.75236 100.82293 -6.206 4.54e-05 ***
## Calories 0.42016 0.04115 10.211 2.86e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 111.6 on 12 degrees of freedom
## Multiple R-squared: 0.8968, Adjusted R-squared: 0.8882
## F-statistic: 104.3 on 1 and 12 DF, p-value: 2.856e-07
# Based on Summary we can tell like intercept(B0) is Symentric and Calories(B1) also Symentric.So we can use these two values(B0,B1) in our dataset.
#R^2 value also Having 0.89 so R^2 Value >0.8 we tell as this is Strong Model