Simple linear regression Q1

a <-read.csv("C:\\Users\\Harisha\\Desktop\\Datascience Assignments\\Simple linear regression\\calories_consumed 1.csv")

attach(a)
View(a)
# 1St Movement Business Decission(Mean,Meadian,Range)

summary(Calories.Consumed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1400    1728    2250    2341    2775    3900
#Based on Calories Summary we can expect  some +ve skewness.
# 2Nd movement Business Decission(Variance,Standard Deviation)
var(Calories.Consumed)
## [1] 565668.7
sd(Calories.Consumed)
## [1] 752.1095
# 3rd & 4th Business Decission(Skewness and Kurtosis)
library(e1071)
skewness(Calories.Consumed)
## [1] 0.5212708
kurtosis(Calories.Consumed)
## [1] -0.9277095
barplot(Calories.Consumed)

hist(Calories.Consumed)

# positive Skewness.
boxplot(Calories.Consumed,horizontal = T)

# can see a positive skewness & no outliers.
qqnorm(Calories.Consumed)
qqline(Calories.Consumed)

# based on the QQ plot data is linearly distributed.
cor(Weight.gained..grams.,Calories.Consumed)
## [1] 0.946991
# correlation value shows a strong correlation. its > 0.85.
plot(a)

# thsi scatter plot shows a positive relation between 2 variable.
SLM <- lm(Weight.gained..grams.~ Calories.Consumed)
summary(SLM)
## 
## Call:
## lm(formula = Weight.gained..grams. ~ Calories.Consumed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -158.67 -107.56   36.70   81.68  165.53 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -625.75236  100.82293  -6.206 4.54e-05 ***
## Calories.Consumed    0.42016    0.04115  10.211 2.86e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111.6 on 12 degrees of freedom
## Multiple R-squared:  0.8968, Adjusted R-squared:  0.8882 
## F-statistic: 104.3 on 1 and 12 DF,  p-value: 2.856e-07
#  Based on Summary B0 and B1. So we can use these two values in our model.

# R squared value is .0.80 so it's a Strong  Model.