Simple linear regression Q1
a <-read.csv("C:\\Users\\Harisha\\Desktop\\Datascience Assignments\\Simple linear regression\\calories_consumed 1.csv")
attach(a)
View(a)
# 1St Movement Business Decission(Mean,Meadian,Range)
summary(Calories.Consumed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1400 1728 2250 2341 2775 3900
#Based on Calories Summary we can expect some +ve skewness.
# 2Nd movement Business Decission(Variance,Standard Deviation)
var(Calories.Consumed)
## [1] 565668.7
sd(Calories.Consumed)
## [1] 752.1095
# 3rd & 4th Business Decission(Skewness and Kurtosis)
library(e1071)
skewness(Calories.Consumed)
## [1] 0.5212708
kurtosis(Calories.Consumed)
## [1] -0.9277095
barplot(Calories.Consumed)

hist(Calories.Consumed)

# positive Skewness.
boxplot(Calories.Consumed,horizontal = T)

# can see a positive skewness & no outliers.
qqnorm(Calories.Consumed)
qqline(Calories.Consumed)

# based on the QQ plot data is linearly distributed.
cor(Weight.gained..grams.,Calories.Consumed)
## [1] 0.946991
# correlation value shows a strong correlation. its > 0.85.
plot(a)

# thsi scatter plot shows a positive relation between 2 variable.
SLM <- lm(Weight.gained..grams.~ Calories.Consumed)
summary(SLM)
##
## Call:
## lm(formula = Weight.gained..grams. ~ Calories.Consumed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -158.67 -107.56 36.70 81.68 165.53
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -625.75236 100.82293 -6.206 4.54e-05 ***
## Calories.Consumed 0.42016 0.04115 10.211 2.86e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 111.6 on 12 degrees of freedom
## Multiple R-squared: 0.8968, Adjusted R-squared: 0.8882
## F-statistic: 104.3 on 1 and 12 DF, p-value: 2.856e-07
# Based on Summary B0 and B1. So we can use these two values in our model.
# R squared value is .0.80 so it's a Strong Model.