# Simple Linear Regression-Q1
##predict weight(Y/Output) gained using calories(X/input) consumed


Q1 <-  read.csv("D:\\DataScience\\Assignments\\SimpleLinearRegression\\calories_consumed.csv")  #Importing Data set #

attach(Q1)
View(Q1)

 # Column Names in given dataset Q1
colnames(Q1)  
## [1] "Weight"   "Calories"
# 1St Movement Business Decission(Mean,Meadian,Range)

summary(Calories)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1400    1728    2250    2341    2775    3900
#Based on Calories Summary we can expect  some +ve skewness here
# 2Nd movement Business Decission(Variance,Standard Deviation)

var(Calories)
## [1] 565668.7
sd(Calories)
## [1] 752.1095
# 3rd & 4th Business Decission(Skewness and Kurtosis)

library(e1071)

skewness(Calories)
## [1] 0.5212708
kurtosis(Calories)
## [1] -0.9277095
barplot(Calories)

hist(Calories)

# Based on Histograme we Confirmed as some +ve Skewness we have.

boxplot(Calories,horizontal = T )

#Based on Boxplot we dont have outliears but we have some +ve Skew


qqnorm(Q1$Calories)
qqline(Q1$Calories)

# Based on qqnorm we confirmed as Calories data is linearly Distributed.
#Corelation Coefficient(r-value>0.85 Strong Corelation) value for weight and calaries

cor(Weight,Calories)
## [1] 0.946991
#Based on this value we can build 94% Accurate Model

plot(Q1)  

# Scatter Plot is used for relation between two variables.
#Based on this scatter plot we Can expect this model variables are having  +ve Strong Corelation 
Model1 <- lm(Weight ~ Calories)


summary(Model1)
## 
## Call:
## lm(formula = Weight ~ Calories)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -158.67 -107.56   36.70   81.68  165.53 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -625.75236  100.82293  -6.206 4.54e-05 ***
## Calories       0.42016    0.04115  10.211 2.86e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111.6 on 12 degrees of freedom
## Multiple R-squared:  0.8968, Adjusted R-squared:  0.8882 
## F-statistic: 104.3 on 1 and 12 DF,  p-value: 2.856e-07
# Based on Summary we can tell like intercept(B0) is Symentric and Calories(B1) also Symentric.So we can use these two values(B0,B1) in our dataset.

#R^2 value also Having 0.89 so R^2 Value >0.8 we tell as this is Strong  Model