url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv"
ozone <- read.csv(url)
dim(ozone)
## [1] 366  13
mod <- lm(ozone_reading ~ ., data=ozone)
cooksd <- cooks.distance(mod)
summary(ozone)
##      Month         Day_of_month    Day_of_week    ozone_reading  
##  Min.   : 1.000   Min.   : 1.00   Min.   :1.000   Min.   : 0.72  
##  1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.:2.000   1st Qu.: 4.94  
##  Median : 7.000   Median :16.00   Median :4.000   Median : 9.35  
##  Mean   : 6.514   Mean   :15.76   Mean   :4.003   Mean   :11.52  
##  3rd Qu.: 9.750   3rd Qu.:23.00   3rd Qu.:6.000   3rd Qu.:16.22  
##  Max.   :12.000   Max.   :31.00   Max.   :7.000   Max.   :37.98  
##                                                   NA's   :5      
##  pressure_height   Wind_speed        Humidity     Temperature_Sandburg
##  Min.   :5320    Min.   : 0.000   Min.   :19.00   Min.   :25.00       
##  1st Qu.:5700    1st Qu.: 3.000   1st Qu.:49.00   1st Qu.:51.00       
##  Median :5770    Median : 5.000   Median :65.00   Median :62.00       
##  Mean   :5753    Mean   : 4.869   Mean   :58.48   Mean   :61.91       
##  3rd Qu.:5830    3rd Qu.: 6.000   3rd Qu.:73.00   3rd Qu.:72.00       
##  Max.   :5950    Max.   :11.000   Max.   :93.00   Max.   :93.00       
##  NA's   :12                       NA's   :15      NA's   :2           
##  Temperature_ElMonte Inversion_base_height Pressure_gradient
##  Min.   :27.68       Min.   : 111          Min.   :-69.0    
##  1st Qu.:49.73       1st Qu.: 890          1st Qu.:-10.0    
##  Median :57.02       Median :2125          Median : 24.0    
##  Mean   :56.85       Mean   :2591          Mean   : 17.8    
##  3rd Qu.:66.11       3rd Qu.:5000          3rd Qu.: 45.0    
##  Max.   :82.58       Max.   :5000          Max.   :107.0    
##  NA's   :139         NA's   :15            NA's   :1        
##  Inversion_temperature   Visibility   
##  Min.   :27.50         Min.   :  0.0  
##  1st Qu.:51.26         1st Qu.: 70.0  
##  Median :62.24         Median :110.0  
##  Mean   :60.93         Mean   :123.3  
##  3rd Qu.:70.52         3rd Qu.:150.0  
##  Max.   :91.76         Max.   :500.0  
##  NA's   :14
mod <- lm(ozone_reading ~ ., data=ozone)
cooksd <- cooks.distance(mod)


plot(cooksd, pch=".", cex=5, main="Influential Obs by Cooks distance")  # plot cook's distance
abline(h = 4*mean(cooksd, na.rm=T), col="red")  # add cutoff line
text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T),names(cooksd),""), col="red")

influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))])  # influential row numbers
a1 <- (ozone[influential, ]) 
a2 <- (ozone[-influential, ]) 

dim(a1)
## [1]  9 13
dim(a2)
## [1] 357  13
dim(ozone)
## [1] 366  13