theURL <- "https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/datasets/airquality.csv"
air <- read.table(file = theURL, header = TRUE, sep = ",")
summary(air)
## X Ozone Solar.R Wind
## Min. : 1 Min. : 1.00 Min. : 7.0 Min. : 1.700
## 1st Qu.: 39 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400
## Median : 77 Median : 31.50 Median :205.0 Median : 9.700
## Mean : 77 Mean : 42.13 Mean :185.9 Mean : 9.958
## 3rd Qu.:115 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500
## Max. :153 Max. :168.00 Max. :334.0 Max. :20.700
## NA's :37 NA's :7
## Temp Month Day
## Min. :56.00 Min. :5.000 Min. : 1.0
## 1st Qu.:72.00 1st Qu.:6.000 1st Qu.: 8.0
## Median :79.00 Median :7.000 Median :16.0
## Mean :77.88 Mean :6.993 Mean :15.8
## 3rd Qu.:85.00 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :97.00 Max. :9.000 Max. :31.0
##
#print(typeof(air))
mean(air[,"Solar.R"], na.rm = TRUE)
## [1] 185.9315
median(air[,"Solar.R"], na.rm = TRUE)
## [1] 205
mean(air[,"Ozone"], na.rm = TRUE)
## [1] 42.12931
median(air[,"Ozone"], na.rm = TRUE)
## [1] 31.5
newAir <- air[1:50, 1:5]
print(newAir)
## X Ozone Solar.R Wind Temp
## 1 1 41 190 7.4 67
## 2 2 36 118 8.0 72
## 3 3 12 149 12.6 74
## 4 4 18 313 11.5 62
## 5 5 NA NA 14.3 56
## 6 6 28 NA 14.9 66
## 7 7 23 299 8.6 65
## 8 8 19 99 13.8 59
## 9 9 8 19 20.1 61
## 10 10 NA 194 8.6 69
## 11 11 7 NA 6.9 74
## 12 12 16 256 9.7 69
## 13 13 11 290 9.2 66
## 14 14 14 274 10.9 68
## 15 15 18 65 13.2 58
## 16 16 14 334 11.5 64
## 17 17 34 307 12.0 66
## 18 18 6 78 18.4 57
## 19 19 30 322 11.5 68
## 20 20 11 44 9.7 62
## 21 21 1 8 9.7 59
## 22 22 11 320 16.6 73
## 23 23 4 25 9.7 61
## 24 24 32 92 12.0 61
## 25 25 NA 66 16.6 57
## 26 26 NA 266 14.9 58
## 27 27 NA NA 8.0 57
## 28 28 23 13 12.0 67
## 29 29 45 252 14.9 81
## 30 30 115 223 5.7 79
## 31 31 37 279 7.4 76
## 32 32 NA 286 8.6 78
## 33 33 NA 287 9.7 74
## 34 34 NA 242 16.1 67
## 35 35 NA 186 9.2 84
## 36 36 NA 220 8.6 85
## 37 37 NA 264 14.3 79
## 38 38 29 127 9.7 82
## 39 39 NA 273 6.9 87
## 40 40 71 291 13.8 90
## 41 41 39 323 11.5 87
## 42 42 NA 259 10.9 93
## 43 43 NA 250 9.2 92
## 44 44 23 148 8.0 82
## 45 45 NA 332 13.8 80
## 46 46 NA 322 11.5 79
## 47 47 21 191 14.9 77
## 48 48 37 284 20.7 72
## 49 49 20 37 9.2 65
## 50 50 12 120 11.5 73
colnames(newAir) <- c("obs", "newOzone", "solar", "w", "t")
print(newAir)
## obs newOzone solar w t
## 1 1 41 190 7.4 67
## 2 2 36 118 8.0 72
## 3 3 12 149 12.6 74
## 4 4 18 313 11.5 62
## 5 5 NA NA 14.3 56
## 6 6 28 NA 14.9 66
## 7 7 23 299 8.6 65
## 8 8 19 99 13.8 59
## 9 9 8 19 20.1 61
## 10 10 NA 194 8.6 69
## 11 11 7 NA 6.9 74
## 12 12 16 256 9.7 69
## 13 13 11 290 9.2 66
## 14 14 14 274 10.9 68
## 15 15 18 65 13.2 58
## 16 16 14 334 11.5 64
## 17 17 34 307 12.0 66
## 18 18 6 78 18.4 57
## 19 19 30 322 11.5 68
## 20 20 11 44 9.7 62
## 21 21 1 8 9.7 59
## 22 22 11 320 16.6 73
## 23 23 4 25 9.7 61
## 24 24 32 92 12.0 61
## 25 25 NA 66 16.6 57
## 26 26 NA 266 14.9 58
## 27 27 NA NA 8.0 57
## 28 28 23 13 12.0 67
## 29 29 45 252 14.9 81
## 30 30 115 223 5.7 79
## 31 31 37 279 7.4 76
## 32 32 NA 286 8.6 78
## 33 33 NA 287 9.7 74
## 34 34 NA 242 16.1 67
## 35 35 NA 186 9.2 84
## 36 36 NA 220 8.6 85
## 37 37 NA 264 14.3 79
## 38 38 29 127 9.7 82
## 39 39 NA 273 6.9 87
## 40 40 71 291 13.8 90
## 41 41 39 323 11.5 87
## 42 42 NA 259 10.9 93
## 43 43 NA 250 9.2 92
## 44 44 23 148 8.0 82
## 45 45 NA 332 13.8 80
## 46 46 NA 322 11.5 79
## 47 47 21 191 14.9 77
## 48 48 37 284 20.7 72
## 49 49 20 37 9.2 65
## 50 50 12 120 11.5 73
summary(newAir)
## obs newOzone solar w
## Min. : 1.00 Min. : 1.00 Min. : 8.0 Min. : 5.70
## 1st Qu.:13.25 1st Qu.: 12.00 1st Qu.:118.5 1st Qu.: 9.20
## Median :25.50 Median : 20.50 Median :246.0 Median :11.50
## Mean :25.50 Mean : 25.47 Mean :203.0 Mean :11.57
## 3rd Qu.:37.75 3rd Qu.: 33.50 3rd Qu.:286.8 3rd Qu.:13.80
## Max. :50.00 Max. :115.00 Max. :334.0 Max. :20.70
## NA's :16 NA's :4
## t
## Min. :56.00
## 1st Qu.:62.50
## Median :69.00
## Mean :71.16
## 3rd Qu.:79.00
## Max. :93.00
##
mean(newAir[,"solar"], na.rm = TRUE)
## [1] 202.9783
median(newAir[,"solar"], na.rm = TRUE)
## [1] 246
mean(newAir[,"newOzone"], na.rm = TRUE)
## [1] 25.47059
median(newAir[,"newOzone"], na.rm = TRUE)
## [1] 20.5
print(newAir)
## obs newOzone solar w t
## 1 1 41 190 7.4 67
## 2 2 36 118 8.0 72
## 3 3 12 149 12.6 74
## 4 4 18 313 11.5 62
## 5 5 NA NA 14.3 56
## 6 6 28 NA 14.9 66
## 7 7 23 299 8.6 65
## 8 8 19 99 13.8 59
## 9 9 8 19 20.1 61
## 10 10 NA 194 8.6 69
## 11 11 7 NA 6.9 74
## 12 12 16 256 9.7 69
## 13 13 11 290 9.2 66
## 14 14 14 274 10.9 68
## 15 15 18 65 13.2 58
## 16 16 14 334 11.5 64
## 17 17 34 307 12.0 66
## 18 18 6 78 18.4 57
## 19 19 30 322 11.5 68
## 20 20 11 44 9.7 62
## 21 21 1 8 9.7 59
## 22 22 11 320 16.6 73
## 23 23 4 25 9.7 61
## 24 24 32 92 12.0 61
## 25 25 NA 66 16.6 57
## 26 26 NA 266 14.9 58
## 27 27 NA NA 8.0 57
## 28 28 23 13 12.0 67
## 29 29 45 252 14.9 81
## 30 30 115 223 5.7 79
## 31 31 37 279 7.4 76
## 32 32 NA 286 8.6 78
## 33 33 NA 287 9.7 74
## 34 34 NA 242 16.1 67
## 35 35 NA 186 9.2 84
## 36 36 NA 220 8.6 85
## 37 37 NA 264 14.3 79
## 38 38 29 127 9.7 82
## 39 39 NA 273 6.9 87
## 40 40 71 291 13.8 90
## 41 41 39 323 11.5 87
## 42 42 NA 259 10.9 93
## 43 43 NA 250 9.2 92
## 44 44 23 148 8.0 82
## 45 45 NA 332 13.8 80
## 46 46 NA 322 11.5 79
## 47 47 21 191 14.9 77
## 48 48 37 284 20.7 72
## 49 49 20 37 9.2 65
## 50 50 12 120 11.5 73
newAir$newOzone[is.na(newAir$newOzone)] <- 0
newAir$newOzone[newAir$newOzone==41] <- 0
newAir$newOzone[newAir$newOzone==18] <- 0
print(newAir)
## obs newOzone solar w t
## 1 1 0 190 7.4 67
## 2 2 36 118 8.0 72
## 3 3 12 149 12.6 74
## 4 4 0 313 11.5 62
## 5 5 0 NA 14.3 56
## 6 6 28 NA 14.9 66
## 7 7 23 299 8.6 65
## 8 8 19 99 13.8 59
## 9 9 8 19 20.1 61
## 10 10 0 194 8.6 69
## 11 11 7 NA 6.9 74
## 12 12 16 256 9.7 69
## 13 13 11 290 9.2 66
## 14 14 14 274 10.9 68
## 15 15 0 65 13.2 58
## 16 16 14 334 11.5 64
## 17 17 34 307 12.0 66
## 18 18 6 78 18.4 57
## 19 19 30 322 11.5 68
## 20 20 11 44 9.7 62
## 21 21 1 8 9.7 59
## 22 22 11 320 16.6 73
## 23 23 4 25 9.7 61
## 24 24 32 92 12.0 61
## 25 25 0 66 16.6 57
## 26 26 0 266 14.9 58
## 27 27 0 NA 8.0 57
## 28 28 23 13 12.0 67
## 29 29 45 252 14.9 81
## 30 30 115 223 5.7 79
## 31 31 37 279 7.4 76
## 32 32 0 286 8.6 78
## 33 33 0 287 9.7 74
## 34 34 0 242 16.1 67
## 35 35 0 186 9.2 84
## 36 36 0 220 8.6 85
## 37 37 0 264 14.3 79
## 38 38 29 127 9.7 82
## 39 39 0 273 6.9 87
## 40 40 71 291 13.8 90
## 41 41 39 323 11.5 87
## 42 42 0 259 10.9 93
## 43 43 0 250 9.2 92
## 44 44 23 148 8.0 82
## 45 45 0 332 13.8 80
## 46 46 0 322 11.5 79
## 47 47 21 191 14.9 77
## 48 48 37 284 20.7 72
## 49 49 20 37 9.2 65
## 50 50 12 120 11.5 73
head(air)
## X Ozone Solar.R Wind Temp Month Day
## 1 1 41 190 7.4 67 5 1
## 2 2 36 118 8.0 72 5 2
## 3 3 12 149 12.6 74 5 3
## 4 4 18 313 11.5 62 5 4
## 5 5 NA NA 14.3 56 5 5
## 6 6 28 NA 14.9 66 5 6
head(newAir)
## obs newOzone solar w t
## 1 1 0 190 7.4 67
## 2 2 36 118 8.0 72
## 3 3 12 149 12.6 74
## 4 4 0 313 11.5 62
## 5 5 0 NA 14.3 56
## 6 6 28 NA 14.9 66
7.BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
Done above from the original source file’s link.