computer_data <- read.csv("C:\\Users\\amits\\Desktop\\sconcept\\datascience training\\assignment\\datasetsandcodesandassignments\\Computer_Data.csv")
head(computer_data)
## X price speed hd ram screen cd multi premium ads trend
## 1 1 1499 25 80 4 14 no no yes 94 1
## 2 2 1795 33 85 2 14 no no yes 94 1
## 3 3 1595 25 170 4 15 no no yes 94 1
## 4 4 1849 25 170 8 14 no no no 94 1
## 5 5 3295 33 340 16 14 no no yes 94 1
## 6 6 3695 66 340 16 14 no no yes 94 1
head(computer_data[,c(4,2)])
## hd price
## 1 80 1499
## 2 85 1795
## 3 170 1595
## 4 170 1849
## 5 340 3295
## 6 340 3695
pricehd <- computer_data[,c(4,2)]
head(pricehd)
## hd price
## 1 80 1499
## 2 85 1795
## 3 170 1595
## 4 170 1849
## 5 340 3295
## 6 340 3695
attach(pricehd)
View(pricehd)
summary(pricehd)
## hd price
## Min. : 80.0 Min. : 949
## 1st Qu.: 214.0 1st Qu.:1794
## Median : 340.0 Median :2144
## Mean : 416.6 Mean :2220
## 3rd Qu.: 528.0 3rd Qu.:2595
## Max. :2100.0 Max. :5399
windows()
qqnorm(price)
qqline(price)
## scatter plot
windows()
plot(price ~ hd)
cor(price,hd)
## [1] 0.4302578
m1 <- lm(price ~ hd) #linear model
summary(m1)
##
## Call:
## lm(formula = price ~ hd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1338.45 -382.23 -44.47 315.34 2674.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.817e+03 1.257e+01 144.6 <2e-16 ***
## hd 9.665e-01 2.564e-02 37.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 524.3 on 6257 degrees of freedom
## Multiple R-squared: 0.1851, Adjusted R-squared: 0.185
## F-statistic: 1421 on 1 and 6257 DF, p-value: < 2.2e-16
pv <- predict(m1,pricehd)
class(pv) #data type
## [1] "numeric"
pv1 <- as.data.frame(pv) #numeric to tabular form, pv is y^
final<- cbind(pricehd, pv1) #column bind
View(final)
setwd("C:\\Users\\amits\\Desktop\\sconcept\\datascience training\\assignment")
write.csv(final,"Aug16_SLR_assignment.csv")
getwd()
## [1] "C:/Users/amits/Desktop/sconcept/datascience training/assignment"
test <- read.csv("C:\\Users\\amits\\Desktop\\sconcept\\datascience training\\assignment\\Aug16_SLR_assignment.csv")
View(test)
m2 <-lm(price ~ log(hd))
summary(m2)
##
## Call:
## lm(formula = price ~ log(hd))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1231.30 -366.59 -22.54 312.96 2644.09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -301.88 61.92 -4.875 1.11e-06 ***
## log(hd) 431.14 10.53 40.951 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 515.8 on 6257 degrees of freedom
## Multiple R-squared: 0.2114, Adjusted R-squared: 0.2112
## F-statistic: 1677 on 1 and 6257 DF, p-value: < 2.2e-16
##R-squared: 0.2114, accuracy 21.1%
View(predict(m2, newdata =test))
m3 <-lm(price ~ sqrt(hd))
summary(m3)
##
## Call:
## lm(formula = price ~ sqrt(hd))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1306.15 -371.62 -45.63 309.23 2630.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1362.637 22.372 60.91 <2e-16 ***
## sqrt(hd) 43.909 1.096 40.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 518.2 on 6257 degrees of freedom
## Multiple R-squared: 0.2041, Adjusted R-squared: 0.204
## F-statistic: 1605 on 1 and 6257 DF, p-value: < 2.2e-16
View(predict(m3, newdata =test))
m4 <-lm(log(price) ~ hd)
summary(m4)
##
## Call:
## lm(formula = log(price) ~ hd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6935 -0.1643 0.0023 0.1588 0.7987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.493e+00 5.584e-03 1341.9 <2e-16 ***
## hd 4.293e-04 1.139e-05 37.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2329 on 6257 degrees of freedom
## Multiple R-squared: 0.1851, Adjusted R-squared: 0.1849
## F-statistic: 1421 on 1 and 6257 DF, p-value: < 2.2e-16
p4<-predict(m4, newdata=test)
m4 <-lm(price ~ hd^2)
summary(m4)
##
## Call:
## lm(formula = price ~ hd^2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1338.45 -382.23 -44.47 315.34 2674.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.817e+03 1.257e+01 144.6 <2e-16 ***
## hd 9.665e-01 2.564e-02 37.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 524.3 on 6257 degrees of freedom
## Multiple R-squared: 0.1851, Adjusted R-squared: 0.185
## F-statistic: 1421 on 1 and 6257 DF, p-value: < 2.2e-16
## R-squared: 0.185, accuracy 18.5% no change
head(test)
## X hd price pv
## 1 1 80 1499 1894.240
## 2 2 85 1795 1899.073
## 3 3 170 1595 1981.228
## 4 4 170 1849 1981.228
## 5 5 340 3295 2145.539
## 6 6 340 3695 2145.539
p4<-predict(m4, newdata=test)