Lab 1
rm(list=ls())
getwd()
## [1] "C:/AGZ1/GD_AGZ1117/AGZ_Home/workspace_R/Asif_06_2018"
ls()
## character(0)
setwd("C:/AGZ1/GD_AGZ1117/AGZ_Home/workspace_R")
LoadLibraries=function (){
library (ISLR)
library (MASS)
library(dplyr)
library(tidyr)
library(sqldf)
library(ggplot2)
print ("The libraries have been loaded .")
}
LoadLibraries()
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## [1] "The libraries have been loaded ."
getwd()
## [1] "C:/AGZ1/GD_AGZ1117/AGZ_Home/workspace_R"
data = read.csv("data/dataset-1.csv")
head(data)
## x y
## 1 5.325177 11.003381
## 2 -1.017725 1.829188
## 3 9.334208 24.211486
## 4 7.818031 16.930033
## 5 3.161936 10.991406
## 6 -6.296965 -6.661919
fraction = 0.8
subset.rows = sample(nrow(data), floor(nrow(data) * fraction))
training = data[subset.rows,]
testing = data[-subset.rows,]
dim(data)
## [1] 99 2
dim(training)
## [1] 79 2
dim(testing)
## [1] 20 2
summary(data)
## x y
## Min. :-9.6907 Min. :-20.630
## 1st Qu.:-5.2669 1st Qu.: -7.658
## Median :-0.3784 Median : 2.476
## Mean :-0.2800 Mean : 2.843
## 3rd Qu.: 4.2545 3rd Qu.: 13.019
## Max. : 9.9298 Max. : 27.737
attach(data)
# ?attach
plot(x, y)

model = lm(y ~ x)
summary(model)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5966 -1.9969 0.2185 2.0410 4.2002
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.45196 0.24178 14.28 <2e-16 ***
## x 2.17592 0.04093 53.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.403 on 97 degrees of freedom
## Multiple R-squared: 0.9668, Adjusted R-squared: 0.9665
## F-statistic: 2826 on 1 and 97 DF, p-value: < 2.2e-16
lm(formula = y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 3.452 2.176
plot(fitted(model))

residuals(model)
## 1 2 3 4 5 6
## -4.03572706 0.59170882 0.44906158 -3.53331717 0.65933290 3.58778939
## 7 8 9 10 11 12
## 3.54629286 -4.06053113 2.02136410 2.85318273 -2.32401804 4.18271806
## 13 14 15 16 17 18
## -4.59663379 0.79126758 3.49138025 -0.82208806 -0.47243498 2.67837278
## 19 20 21 22 23 24
## 1.09524097 -2.25799327 1.14858569 1.52530442 2.09151494 -3.48659143
## 25 26 27 28 29 30
## 0.32073371 1.53499868 -3.33790061 -1.82220766 -4.50022113 -3.00643918
## 31 32 33 34 35 36
## 0.23779328 2.42592540 -0.77503003 0.59615358 -3.16726961 4.20015315
## 37 38 39 40 41 42
## -0.54185728 3.73828500 2.78703616 0.49290206 -2.88272498 1.03847473
## 43 44 45 46 47 48
## 2.07903815 1.12973488 -1.53461004 -3.05760475 -1.48496269 -0.73810076
## 49 50 51 52 53 54
## -1.58121437 -0.76918178 -2.21555130 0.30591902 3.36979405 -3.23005505
## 55 56 57 58 59 60
## 0.21846080 -3.19269741 -0.59593447 -0.01343787 -0.70769795 0.17565969
## 61 62 63 64 65 66
## 0.61701897 3.00589091 0.28631983 3.24279248 -1.38605071 0.63716817
## 67 68 69 70 71 72
## -0.59277774 2.06055361 -1.69066773 -1.69572798 3.31569446 2.10840095
## 73 74 75 76 77 78
## -2.38278008 -2.17160019 -2.51187933 -1.18581227 2.85107947 -2.92556780
## 79 80 81 82 83 84
## -0.62134052 2.56772594 -2.24010073 4.02391986 0.30520777 1.54305529
## 85 86 87 88 89 90
## 3.74893566 -2.84634580 -0.23161659 1.88933241 -1.29307022 1.62688857
## 91 92 93 94 95 96
## 4.12530449 -4.17763891 1.20159094 0.01972528 -3.38833795 2.16245722
## 97 98 99
## -2.83542636 2.69876340 -0.48123029
confint(model)
## 2.5 % 97.5 %
## (Intercept) 2.972092 3.931836
## x 2.094684 2.257150