url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv"
ozone <- read.csv(url)
dim(ozone)
## [1] 366 13
mod <- lm(ozone_reading ~ ., data=ozone)
cooksd <- cooks.distance(mod)
summary(ozone)
## Month Day_of_month Day_of_week ozone_reading
## Min. : 1.000 Min. : 1.00 Min. :1.000 Min. : 0.72
## 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.:2.000 1st Qu.: 4.94
## Median : 7.000 Median :16.00 Median :4.000 Median : 9.35
## Mean : 6.514 Mean :15.76 Mean :4.003 Mean :11.52
## 3rd Qu.: 9.750 3rd Qu.:23.00 3rd Qu.:6.000 3rd Qu.:16.22
## Max. :12.000 Max. :31.00 Max. :7.000 Max. :37.98
## NA's :5
## pressure_height Wind_speed Humidity Temperature_Sandburg
## Min. :5320 Min. : 0.000 Min. :19.00 Min. :25.00
## 1st Qu.:5700 1st Qu.: 3.000 1st Qu.:49.00 1st Qu.:51.00
## Median :5770 Median : 5.000 Median :65.00 Median :62.00
## Mean :5753 Mean : 4.869 Mean :58.48 Mean :61.91
## 3rd Qu.:5830 3rd Qu.: 6.000 3rd Qu.:73.00 3rd Qu.:72.00
## Max. :5950 Max. :11.000 Max. :93.00 Max. :93.00
## NA's :12 NA's :15 NA's :2
## Temperature_ElMonte Inversion_base_height Pressure_gradient
## Min. :27.68 Min. : 111 Min. :-69.0
## 1st Qu.:49.73 1st Qu.: 890 1st Qu.:-10.0
## Median :57.02 Median :2125 Median : 24.0
## Mean :56.85 Mean :2591 Mean : 17.8
## 3rd Qu.:66.11 3rd Qu.:5000 3rd Qu.: 45.0
## Max. :82.58 Max. :5000 Max. :107.0
## NA's :139 NA's :15 NA's :1
## Inversion_temperature Visibility
## Min. :27.50 Min. : 0.0
## 1st Qu.:51.26 1st Qu.: 70.0
## Median :62.24 Median :110.0
## Mean :60.93 Mean :123.3
## 3rd Qu.:70.52 3rd Qu.:150.0
## Max. :91.76 Max. :500.0
## NA's :14
mod <- lm(ozone_reading ~ ., data=ozone)
cooksd <- cooks.distance(mod)
plot(cooksd, pch=".", cex=5, main="Influential Obs by Cooks distance") # plot cook's distance
abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line
text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T),names(cooksd),""), col="red")

influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))]) # influential row numbers
a1 <- (ozone[influential, ])
a2 <- (ozone[-influential, ])
dim(a1)
## [1] 9 13
dim(a2)
## [1] 357 13
dim(ozone)
## [1] 366 13