class, dim, names, str, glimpse, summaryhead, tail, printhist, plot## X year month measure X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
## 1 1 2014 12 Max.TemperatureF 64 42 51 43 42 45 38 29 49 48 39 39 42 45
## 2 2 2014 12 Mean.TemperatureF 52 38 44 37 34 42 30 24 39 43 36 35 37 39
## 3 3 2014 12 Min.TemperatureF 39 33 37 30 26 38 21 18 29 38 32 31 32 33
## 4 4 2014 12 Max.Dew.PointF 46 40 49 24 37 45 36 28 49 45 37 28 28 29
## 5 5 2014 12 MeanDew.PointF 40 27 42 21 25 40 20 16 41 39 31 27 26 27
## 6 6 2014 12 Min.DewpointF 26 17 24 13 12 36 -3 3 28 37 27 25 24 25
## X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
## 1 42 44 49 44 37 36 36 44 47 46 59 50 52 52 41 30 30
## 2 37 40 45 40 33 32 33 39 45 44 52 44 45 46 36 26 25
## 3 32 35 41 36 29 27 30 33 42 41 44 37 38 40 30 22 20
## 4 33 42 46 34 25 30 30 39 45 46 58 31 34 42 26 10 8
## 5 29 36 41 30 22 24 27 34 42 44 43 29 31 35 20 4 5
## 6 27 30 32 26 20 20 25 25 37 41 29 28 29 27 10 -6 1
## X year month measure X1 X2 X3 X4 X5 X6 X7 X8
## 281 281 2015 12 Mean.Wind.SpeedMPH 6 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 282 2015 12 Max.Gust.SpeedMPH 17 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 283 2015 12 PrecipitationIn 0.14 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 284 2015 12 CloudCover 7 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 285 2015 12 Events Rain <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 286 2015 12 WindDirDegrees 109 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23
## 281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X24 X25 X26 X27 X28 X29 X30 X31
## 281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 'data.frame': 286 obs. of 35 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ month : int 12 12 12 12 12 12 12 12 12 12 ...
## $ measure: chr "Max.TemperatureF" "Mean.TemperatureF" "Min.TemperatureF" "Max.Dew.PointF" ...
## $ X1 : chr "64" "52" "39" "46" ...
## $ X2 : chr "42" "38" "33" "40" ...
## $ X3 : chr "51" "44" "37" "49" ...
## $ X4 : chr "43" "37" "30" "24" ...
## $ X5 : chr "42" "34" "26" "37" ...
## $ X6 : chr "45" "42" "38" "45" ...
## $ X7 : chr "38" "30" "21" "36" ...
## $ X8 : chr "29" "24" "18" "28" ...
## $ X9 : chr "49" "39" "29" "49" ...
## $ X10 : chr "48" "43" "38" "45" ...
## $ X11 : chr "39" "36" "32" "37" ...
## $ X12 : chr "39" "35" "31" "28" ...
## $ X13 : chr "42" "37" "32" "28" ...
## $ X14 : chr "45" "39" "33" "29" ...
## $ X15 : chr "42" "37" "32" "33" ...
## $ X16 : chr "44" "40" "35" "42" ...
## $ X17 : chr "49" "45" "41" "46" ...
## $ X18 : chr "44" "40" "36" "34" ...
## $ X19 : chr "37" "33" "29" "25" ...
## $ X20 : chr "36" "32" "27" "30" ...
## $ X21 : chr "36" "33" "30" "30" ...
## $ X22 : chr "44" "39" "33" "39" ...
## $ X23 : chr "47" "45" "42" "45" ...
## $ X24 : chr "46" "44" "41" "46" ...
## $ X25 : chr "59" "52" "44" "58" ...
## $ X26 : chr "50" "44" "37" "31" ...
## $ X27 : chr "52" "45" "38" "34" ...
## $ X28 : chr "52" "46" "40" "42" ...
## $ X29 : chr "41" "36" "30" "26" ...
## $ X30 : chr "30" "26" "22" "10" ...
## $ X31 : chr "30" "25" "20" "8" ...
weather_clean <- readRDS('data/weather_clean.rds')
# View the first 6 rows of data
head(weather_clean)## date events cloud_cover max_dew_point_f max_gust_speed_mph
## 1 2014-12-01 Rain 6 46 29
## 2 2014-12-02 Rain-Snow 7 40 29
## 3 2014-12-03 Rain 8 49 38
## 4 2014-12-04 None 3 24 33
## 5 2014-12-05 Rain 5 37 26
## 6 2014-12-06 Rain 8 45 25
## max_humidity max_sea_level_pressure_in max_temperature_f max_visibility_miles
## 1 74 30.45 64 10
## 2 92 30.71 42 10
## 3 100 30.40 51 10
## 4 69 30.56 43 10
## 5 85 30.68 42 10
## 6 100 30.42 45 10
## max_wind_speed_mph mean_humidity mean_sea_level_pressure_in
## 1 22 63 30.13
## 2 24 72 30.59
## 3 29 79 30.07
## 4 25 54 30.33
## 5 22 66 30.59
## 6 22 93 30.24
## mean_temperature_f mean_visibility_miles mean_wind_speed_mph mean_dew_point_f
## 1 52 10 13 40
## 2 38 8 15 27
## 3 44 5 12 42
## 4 37 10 12 21
## 5 34 10 10 25
## 6 42 4 8 40
## min_dew_point_f min_humidity min_sea_level_pressure_in min_temperature_f
## 1 26 52 30.01 39
## 2 17 51 30.40 33
## 3 24 57 29.87 37
## 4 13 39 30.09 30
## 5 12 47 30.45 26
## 6 36 85 30.16 38
## min_visibility_miles precipitation_in wind_dir_degrees
## 1 10 0.01 268
## 2 2 0.10 62
## 3 1 0.44 254
## 4 10 0.00 292
## 5 5 0.11 61
## 6 0 1.09 313
## date events cloud_cover max_dew_point_f max_gust_speed_mph
## 361 2015-11-26 None 6 49 28
## 362 2015-11-27 None 7 52 32
## 363 2015-11-28 Rain 8 50 23
## 364 2015-11-29 None 4 33 20
## 365 2015-11-30 None 6 26 17
## 366 2015-12-01 Rain 7 43 17
## max_humidity max_sea_level_pressure_in max_temperature_f
## 361 100 30.87 59
## 362 100 30.63 64
## 363 93 30.20 60
## 364 79 30.42 44
## 365 75 30.53 38
## 366 96 30.40 45
## max_visibility_miles max_wind_speed_mph mean_humidity
## 361 10 22 79
## 362 10 26 78
## 363 10 18 80
## 364 10 16 58
## 365 10 14 65
## 366 10 15 83
## mean_sea_level_pressure_in mean_temperature_f mean_visibility_miles
## 361 30.77 49 9
## 362 30.41 56 9
## 363 30.16 51 9
## 364 30.26 38 10
## 365 30.46 33 10
## 366 30.24 39 8
## mean_wind_speed_mph mean_dew_point_f min_dew_point_f min_humidity
## 361 10 42 34 57
## 362 14 49 47 56
## 363 10 43 36 67
## 364 10 23 15 36
## 365 9 23 18 54
## 366 6 35 25 69
## min_sea_level_pressure_in min_temperature_f min_visibility_miles
## 361 30.64 38 5
## 362 30.15 48 5
## 363 30.11 41 4
## 364 30.19 32 10
## 365 30.39 28 10
## 366 30.01 32 1
## precipitation_in wind_dir_degrees
## 361 0.00 180
## 362 0.00 209
## 363 0.21 358
## 364 0.00 326
## 365 0.00 65
## 366 0.14 109
## 'data.frame': 366 obs. of 23 variables:
## $ date : POSIXct, format: "2014-12-01" "2014-12-02" ...
## $ events : chr "Rain" "Rain-Snow" "Rain" "None" ...
## $ cloud_cover : num 6 7 8 3 5 8 6 8 8 8 ...
## $ max_dew_point_f : num 46 40 49 24 37 45 36 28 49 45 ...
## $ max_gust_speed_mph : num 29 29 38 33 26 25 32 28 52 29 ...
## $ max_humidity : num 74 92 100 69 85 100 92 92 100 100 ...
## $ max_sea_level_pressure_in : num 30.4 30.7 30.4 30.6 30.7 ...
## $ max_temperature_f : num 64 42 51 43 42 45 38 29 49 48 ...
## $ max_visibility_miles : num 10 10 10 10 10 10 10 10 10 10 ...
## $ max_wind_speed_mph : num 22 24 29 25 22 22 25 21 38 23 ...
## $ mean_humidity : num 63 72 79 54 66 93 61 70 93 95 ...
## $ mean_sea_level_pressure_in: num 30.1 30.6 30.1 30.3 30.6 ...
## $ mean_temperature_f : num 52 38 44 37 34 42 30 24 39 43 ...
## $ mean_visibility_miles : num 10 8 5 10 10 4 10 8 2 3 ...
## $ mean_wind_speed_mph : num 13 15 12 12 10 8 15 13 20 13 ...
## $ mean_dew_point_f : num 40 27 42 21 25 40 20 16 41 39 ...
## $ min_dew_point_f : num 26 17 24 13 12 36 -3 3 28 37 ...
## $ min_humidity : num 52 51 57 39 47 85 29 47 86 89 ...
## $ min_sea_level_pressure_in : num 30 30.4 29.9 30.1 30.4 ...
## $ min_temperature_f : num 39 33 37 30 26 38 21 18 29 38 ...
## $ min_visibility_miles : num 10 2 1 10 5 0 5 2 1 1 ...
## $ precipitation_in : num 0.01 0.1 0.44 0 0.11 1.09 0.13 0.03 2.9 0.28 ...
## $ wind_dir_degrees : num 268 62 254 292 61 313 350 354 38 357 ...
class, dim, names, str, glimpse, summary## [1] "data.frame"
## [1] 199 30
## [1] "Country" "Y1980" "Y1981" "Y1982" "Y1983" "Y1984" "Y1985"
## [8] "Y1986" "Y1987" "Y1988" "Y1989" "Y1990" "Y1991" "Y1992"
## [15] "Y1993" "Y1994" "Y1995" "Y1996" "Y1997" "Y1998" "Y1999"
## [22] "Y2000" "Y2001" "Y2002" "Y2003" "Y2004" "Y2005" "Y2006"
## [29] "Y2007" "Y2008"
## 'data.frame': 199 obs. of 30 variables:
## $ Country: chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ Y1980 : num 21.5 25.2 22.3 25.7 20.9 ...
## $ Y1981 : num 21.5 25.2 22.3 25.7 20.9 ...
## $ Y1982 : num 21.5 25.3 22.4 25.7 20.9 ...
## $ Y1983 : num 21.4 25.3 22.5 25.8 20.9 ...
## $ Y1984 : num 21.4 25.3 22.6 25.8 20.9 ...
## $ Y1985 : num 21.4 25.3 22.7 25.9 20.9 ...
## $ Y1986 : num 21.4 25.3 22.8 25.9 21 ...
## $ Y1987 : num 21.4 25.3 22.8 25.9 21 ...
## $ Y1988 : num 21.3 25.3 22.9 26 21 ...
## $ Y1989 : num 21.3 25.3 23 26 21.1 ...
## $ Y1990 : num 21.2 25.3 23 26.1 21.1 ...
## $ Y1991 : num 21.2 25.3 23.1 26.2 21.1 ...
## $ Y1992 : num 21.1 25.2 23.2 26.2 21.1 ...
## $ Y1993 : num 21.1 25.2 23.3 26.3 21.1 ...
## $ Y1994 : num 21 25.2 23.3 26.4 21.1 ...
## $ Y1995 : num 20.9 25.3 23.4 26.4 21.2 ...
## $ Y1996 : num 20.9 25.3 23.5 26.5 21.2 ...
## $ Y1997 : num 20.8 25.3 23.5 26.6 21.2 ...
## $ Y1998 : num 20.8 25.4 23.6 26.7 21.3 ...
## $ Y1999 : num 20.8 25.5 23.7 26.8 21.3 ...
## $ Y2000 : num 20.7 25.6 23.8 26.8 21.4 ...
## $ Y2001 : num 20.6 25.7 23.9 26.9 21.4 ...
## $ Y2002 : num 20.6 25.8 24 27 21.5 ...
## $ Y2003 : num 20.6 25.9 24.1 27.1 21.6 ...
## $ Y2004 : num 20.6 26 24.2 27.2 21.7 ...
## $ Y2005 : num 20.6 26.1 24.3 27.3 21.8 ...
## $ Y2006 : num 20.6 26.2 24.4 27.4 21.9 ...
## $ Y2007 : num 20.6 26.3 24.5 27.5 22.1 ...
## $ Y2008 : num 20.6 26.4 24.6 27.6 22.3 ...
## Observations: 199
## Variables: 30
## $ Country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "...
## $ Y1980 <dbl> 21.48678, 25.22533, 22.25703, 25.66652, 20.94876, 23.31424,...
## $ Y1981 <dbl> 21.46552, 25.23981, 22.34745, 25.70868, 20.94371, 23.39054,...
## $ Y1982 <dbl> 21.45145, 25.25636, 22.43647, 25.74681, 20.93754, 23.45883,...
## $ Y1983 <dbl> 21.43822, 25.27176, 22.52105, 25.78250, 20.93187, 23.53735,...
## $ Y1984 <dbl> 21.42734, 25.27901, 22.60633, 25.81874, 20.93569, 23.63584,...
## $ Y1985 <dbl> 21.41222, 25.28669, 22.69501, 25.85236, 20.94857, 23.73109,...
## $ Y1986 <dbl> 21.40132, 25.29451, 22.76979, 25.89089, 20.96030, 23.83449,...
## $ Y1987 <dbl> 21.37679, 25.30217, 22.84096, 25.93414, 20.98025, 23.93649,...
## $ Y1988 <dbl> 21.34018, 25.30450, 22.90644, 25.98477, 21.01375, 24.05364,...
## $ Y1989 <dbl> 21.29845, 25.31944, 22.97931, 26.04450, 21.05269, 24.16347,...
## $ Y1990 <dbl> 21.24818, 25.32357, 23.04600, 26.10936, 21.09007, 24.26782,...
## $ Y1991 <dbl> 21.20269, 25.28452, 23.11333, 26.17912, 21.12136, 24.36568,...
## $ Y1992 <dbl> 21.14238, 25.23077, 23.18776, 26.24017, 21.14987, 24.45644,...
## $ Y1993 <dbl> 21.06376, 25.21192, 23.25764, 26.30356, 21.13938, 24.54096,...
## $ Y1994 <dbl> 20.97987, 25.22115, 23.32273, 26.36793, 21.14186, 24.60945,...
## $ Y1995 <dbl> 20.91132, 25.25874, 23.39526, 26.43569, 21.16022, 24.66461,...
## $ Y1996 <dbl> 20.85155, 25.31097, 23.46811, 26.50769, 21.19076, 24.72544,...
## $ Y1997 <dbl> 20.81307, 25.33988, 23.54160, 26.58255, 21.22621, 24.78714,...
## $ Y1998 <dbl> 20.78591, 25.39116, 23.61592, 26.66337, 21.27082, 24.84936,...
## $ Y1999 <dbl> 20.75469, 25.46555, 23.69486, 26.75078, 21.31954, 24.91721,...
## $ Y2000 <dbl> 20.69521, 25.55835, 23.77659, 26.83179, 21.37480, 24.99158,...
## $ Y2001 <dbl> 20.62643, 25.66701, 23.86256, 26.92373, 21.43664, 25.05857,...
## $ Y2002 <dbl> 20.59848, 25.77167, 23.95294, 27.02525, 21.51765, 25.13039,...
## $ Y2003 <dbl> 20.58706, 25.87274, 24.05243, 27.12481, 21.59924, 25.20713,...
## $ Y2004 <dbl> 20.57759, 25.98136, 24.15957, 27.23107, 21.69218, 25.29898,...
## $ Y2005 <dbl> 20.58084, 26.08939, 24.27001, 27.32827, 21.80564, 25.39965,...
## $ Y2006 <dbl> 20.58749, 26.20867, 24.38270, 27.43588, 21.93881, 25.51382,...
## $ Y2007 <dbl> 20.60246, 26.32753, 24.48846, 27.53363, 22.08962, 25.64247,...
## $ Y2008 <dbl> 20.62058, 26.44657, 24.59620, 27.63048, 22.25083, 25.76602,...
## Country Y1980 Y1981 Y1982
## Length:199 Min. :19.01 Min. :19.04 Min. :19.07
## Class :character 1st Qu.:21.27 1st Qu.:21.31 1st Qu.:21.36
## Mode :character Median :23.31 Median :23.39 Median :23.46
## Mean :23.15 Mean :23.21 Mean :23.26
## 3rd Qu.:24.82 3rd Qu.:24.89 3rd Qu.:24.94
## Max. :28.12 Max. :28.36 Max. :28.58
## Y1983 Y1984 Y1985 Y1986
## Min. :19.10 Min. :19.13 Min. :19.16 Min. :19.20
## 1st Qu.:21.42 1st Qu.:21.45 1st Qu.:21.47 1st Qu.:21.49
## Median :23.57 Median :23.64 Median :23.73 Median :23.82
## Mean :23.32 Mean :23.37 Mean :23.42 Mean :23.48
## 3rd Qu.:25.02 3rd Qu.:25.06 3rd Qu.:25.11 3rd Qu.:25.20
## Max. :28.82 Max. :29.05 Max. :29.28 Max. :29.52
## Y1987 Y1988 Y1989 Y1990
## Min. :19.23 Min. :19.27 Min. :19.31 Min. :19.35
## 1st Qu.:21.50 1st Qu.:21.52 1st Qu.:21.55 1st Qu.:21.57
## Median :23.87 Median :23.93 Median :24.03 Median :24.14
## Mean :23.53 Mean :23.59 Mean :23.65 Mean :23.71
## 3rd Qu.:25.27 3rd Qu.:25.34 3rd Qu.:25.37 3rd Qu.:25.39
## Max. :29.75 Max. :29.98 Max. :30.20 Max. :30.42
## Y1991 Y1992 Y1993 Y1994
## Min. :19.40 Min. :19.45 Min. :19.51 Min. :19.59
## 1st Qu.:21.60 1st Qu.:21.65 1st Qu.:21.74 1st Qu.:21.76
## Median :24.20 Median :24.19 Median :24.27 Median :24.36
## Mean :23.76 Mean :23.82 Mean :23.88 Mean :23.94
## 3rd Qu.:25.42 3rd Qu.:25.48 3rd Qu.:25.54 3rd Qu.:25.62
## Max. :30.64 Max. :30.85 Max. :31.04 Max. :31.23
## Y1995 Y1996 Y1997 Y1998
## Min. :19.67 Min. :19.71 Min. :19.74 Min. :19.77
## 1st Qu.:21.83 1st Qu.:21.89 1st Qu.:21.94 1st Qu.:22.00
## Median :24.41 Median :24.42 Median :24.50 Median :24.49
## Mean :24.00 Mean :24.07 Mean :24.14 Mean :24.21
## 3rd Qu.:25.70 3rd Qu.:25.78 3rd Qu.:25.85 3rd Qu.:25.94
## Max. :31.41 Max. :31.59 Max. :31.77 Max. :31.95
## Y1999 Y2000 Y2001 Y2002
## Min. :19.80 Min. :19.83 Min. :19.86 Min. :19.84
## 1st Qu.:22.04 1st Qu.:22.12 1st Qu.:22.22 1st Qu.:22.29
## Median :24.61 Median :24.66 Median :24.73 Median :24.81
## Mean :24.29 Mean :24.36 Mean :24.44 Mean :24.52
## 3rd Qu.:26.01 3rd Qu.:26.09 3rd Qu.:26.19 3rd Qu.:26.30
## Max. :32.13 Max. :32.32 Max. :32.51 Max. :32.70
## Y2003 Y2004 Y2005 Y2006
## Min. :19.81 Min. :19.79 Min. :19.79 Min. :19.80
## 1st Qu.:22.37 1st Qu.:22.45 1st Qu.:22.54 1st Qu.:22.63
## Median :24.89 Median :25.00 Median :25.11 Median :25.24
## Mean :24.61 Mean :24.70 Mean :24.79 Mean :24.89
## 3rd Qu.:26.38 3rd Qu.:26.47 3rd Qu.:26.53 3rd Qu.:26.59
## Max. :32.90 Max. :33.10 Max. :33.30 Max. :33.49
## Y2007 Y2008
## Min. :19.83 Min. :19.87
## 1st Qu.:22.73 1st Qu.:22.83
## Median :25.36 Median :25.50
## Mean :24.99 Mean :25.10
## 3rd Qu.:26.66 3rd Qu.:26.82
## Max. :33.69 Max. :33.90
head, tail, printhist, plot# Print bmi to the console
# Don't do this is just wastes space. Use the head, tail, str, and summary functions
# bmi
# View the first 6 rows
head(bmi)## Country Y1980 Y1981 Y1982 Y1983 Y1984 Y1985
## 1 Afghanistan 21.48678 21.46552 21.45145 21.43822 21.42734 21.41222
## 2 Albania 25.22533 25.23981 25.25636 25.27176 25.27901 25.28669
## 3 Algeria 22.25703 22.34745 22.43647 22.52105 22.60633 22.69501
## 4 Andorra 25.66652 25.70868 25.74681 25.78250 25.81874 25.85236
## 5 Angola 20.94876 20.94371 20.93754 20.93187 20.93569 20.94857
## 6 Antigua and Barbuda 23.31424 23.39054 23.45883 23.53735 23.63584 23.73109
## Y1986 Y1987 Y1988 Y1989 Y1990 Y1991 Y1992 Y1993
## 1 21.40132 21.37679 21.34018 21.29845 21.24818 21.20269 21.14238 21.06376
## 2 25.29451 25.30217 25.30450 25.31944 25.32357 25.28452 25.23077 25.21192
## 3 22.76979 22.84096 22.90644 22.97931 23.04600 23.11333 23.18776 23.25764
## 4 25.89089 25.93414 25.98477 26.04450 26.10936 26.17912 26.24017 26.30356
## 5 20.96030 20.98025 21.01375 21.05269 21.09007 21.12136 21.14987 21.13938
## 6 23.83449 23.93649 24.05364 24.16347 24.26782 24.36568 24.45644 24.54096
## Y1994 Y1995 Y1996 Y1997 Y1998 Y1999 Y2000 Y2001
## 1 20.97987 20.91132 20.85155 20.81307 20.78591 20.75469 20.69521 20.62643
## 2 25.22115 25.25874 25.31097 25.33988 25.39116 25.46555 25.55835 25.66701
## 3 23.32273 23.39526 23.46811 23.54160 23.61592 23.69486 23.77659 23.86256
## 4 26.36793 26.43569 26.50769 26.58255 26.66337 26.75078 26.83179 26.92373
## 5 21.14186 21.16022 21.19076 21.22621 21.27082 21.31954 21.37480 21.43664
## 6 24.60945 24.66461 24.72544 24.78714 24.84936 24.91721 24.99158 25.05857
## Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008
## 1 20.59848 20.58706 20.57759 20.58084 20.58749 20.60246 20.62058
## 2 25.77167 25.87274 25.98136 26.08939 26.20867 26.32753 26.44657
## 3 23.95294 24.05243 24.15957 24.27001 24.38270 24.48846 24.59620
## 4 27.02525 27.12481 27.23107 27.32827 27.43588 27.53363 27.63048
## 5 21.51765 21.59924 21.69218 21.80564 21.93881 22.08962 22.25083
## 6 25.13039 25.20713 25.29898 25.39965 25.51382 25.64247 25.76602
## Country Y1980 Y1981 Y1982 Y1983 Y1984 Y1985
## 1 Afghanistan 21.48678 21.46552 21.45145 21.43822 21.42734 21.41222
## 2 Albania 25.22533 25.23981 25.25636 25.27176 25.27901 25.28669
## 3 Algeria 22.25703 22.34745 22.43647 22.52105 22.60633 22.69501
## 4 Andorra 25.66652 25.70868 25.74681 25.78250 25.81874 25.85236
## 5 Angola 20.94876 20.94371 20.93754 20.93187 20.93569 20.94857
## 6 Antigua and Barbuda 23.31424 23.39054 23.45883 23.53735 23.63584 23.73109
## 7 Argentina 25.37913 25.44951 25.50242 25.55644 25.61271 25.66593
## 8 Armenia 23.82469 23.86401 23.91023 23.95649 24.00181 24.04083
## 9 Australia 24.92729 25.00216 25.07660 25.14938 25.22894 25.31849
## 10 Austria 24.84097 24.88110 24.93482 24.98118 25.02208 25.06015
## 11 Azerbaijan 24.49375 24.52584 24.56064 24.60150 24.64121 24.67566
## 12 Bahamas 24.21064 24.30814 24.42750 24.54415 24.66558 24.78408
## 13 Bahrain 23.97588 24.09045 24.20617 24.32335 24.43174 24.53684
## 14 Bangladesh 20.51918 20.47766 20.43741 20.40075 20.36524 20.32983
## 15 Barbados 24.36372 24.43455 24.49314 24.54713 24.59913 24.64998
## Y1986 Y1987 Y1988 Y1989 Y1990 Y1991 Y1992 Y1993
## 1 21.40132 21.37679 21.34018 21.29845 21.24818 21.20269 21.14238 21.06376
## 2 25.29451 25.30217 25.30450 25.31944 25.32357 25.28452 25.23077 25.21192
## 3 22.76979 22.84096 22.90644 22.97931 23.04600 23.11333 23.18776 23.25764
## 4 25.89089 25.93414 25.98477 26.04450 26.10936 26.17912 26.24017 26.30356
## 5 20.96030 20.98025 21.01375 21.05269 21.09007 21.12136 21.14987 21.13938
## 6 23.83449 23.93649 24.05364 24.16347 24.26782 24.36568 24.45644 24.54096
## 7 25.72364 25.78529 25.84428 25.88510 25.92482 25.99177 26.07642 26.17288
## 8 24.08736 24.13334 24.17219 24.19556 24.20618 24.19790 24.12982 24.05854
## 9 25.41017 25.50528 25.60001 25.70050 25.80568 25.90295 26.00624 26.10586
## 10 25.10680 25.14747 25.19333 25.24928 25.30882 25.37186 25.43668 25.50507
## 11 24.71906 24.75799 24.78894 24.82277 24.83167 24.83972 24.81781 24.76250
## 12 24.90724 25.03166 25.14778 25.26173 25.35641 25.44039 25.51294 25.58479
## 13 24.63328 24.74914 24.86604 24.98644 25.11479 25.25103 25.40173 25.56146
## 14 20.29654 20.26401 20.23497 20.20736 20.18246 20.15921 20.14118 20.12952
## 15 24.71728 24.77976 24.84265 24.90790 24.96113 25.00859 25.05249 25.09414
## Y1994 Y1995 Y1996 Y1997 Y1998 Y1999 Y2000 Y2001
## 1 20.97987 20.91132 20.85155 20.81307 20.78591 20.75469 20.69521 20.62643
## 2 25.22115 25.25874 25.31097 25.33988 25.39116 25.46555 25.55835 25.66701
## 3 23.32273 23.39526 23.46811 23.54160 23.61592 23.69486 23.77659 23.86256
## 4 26.36793 26.43569 26.50769 26.58255 26.66337 26.75078 26.83179 26.92373
## 5 21.14186 21.16022 21.19076 21.22621 21.27082 21.31954 21.37480 21.43664
## 6 24.60945 24.66461 24.72544 24.78714 24.84936 24.91721 24.99158 25.05857
## 7 26.27872 26.37522 26.47182 26.57778 26.68714 26.79005 26.88103 26.96067
## 8 24.02297 24.01570 24.02627 24.03885 24.07100 24.11699 24.18045 24.26670
## 9 26.20077 26.29241 26.38256 26.47351 26.56314 26.65506 26.74486 26.84397
## 10 25.56626 25.61814 25.66410 25.71737 25.75996 25.81773 25.87471 25.93806
## 11 24.69113 24.61946 24.55527 24.49745 24.47179 24.47842 24.51287 24.57202
## 12 25.65514 25.72418 25.79938 25.89374 25.99417 26.12080 26.25748 26.38653
## 13 25.71611 25.87566 26.03138 26.18600 26.34294 26.50245 26.65409 26.80388
## 14 20.11823 20.10770 20.10489 20.11304 20.12622 20.13361 20.14774 20.16802
## 15 25.14401 25.20411 25.26850 25.35236 25.42067 25.51681 25.60292 25.68910
## Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008
## 1 20.59848 20.58706 20.57759 20.58084 20.58749 20.60246 20.62058
## 2 25.77167 25.87274 25.98136 26.08939 26.20867 26.32753 26.44657
## 3 23.95294 24.05243 24.15957 24.27001 24.38270 24.48846 24.59620
## 4 27.02525 27.12481 27.23107 27.32827 27.43588 27.53363 27.63048
## 5 21.51765 21.59924 21.69218 21.80564 21.93881 22.08962 22.25083
## 6 25.13039 25.20713 25.29898 25.39965 25.51382 25.64247 25.76602
## 7 26.99882 27.04738 27.11001 27.18941 27.28179 27.38889 27.50170
## 8 24.37698 24.50332 24.64178 24.81447 24.99160 25.17590 25.35542
## 9 26.93858 27.03801 27.13871 27.24614 27.35267 27.45878 27.56373
## 10 25.99583 26.06356 26.14360 26.21107 26.29374 26.38136 26.46741
## 11 24.66021 24.77164 24.89376 25.06256 25.25706 25.45513 25.65117
## 12 26.51184 26.62607 26.75612 26.88517 27.00715 27.12653 27.24594
## 13 26.94923 27.09298 27.23908 27.38693 27.53868 27.68865 27.83721
## 14 20.18621 20.20948 20.23957 20.27648 20.31554 20.35493 20.39742
## 15 25.77615 25.87020 25.95660 26.06074 26.16874 26.27575 26.38439
## Country Y1980 Y1981 Y1982 Y1983 Y1984 Y1985
## 194 Venezuela 24.58052 24.69666 24.80082 24.89208 24.98440 25.07104
## 195 Vietnam 19.01394 19.03902 19.06804 19.09675 19.13046 19.16397
## 196 West Bank and Gaza 24.31624 24.40192 24.48713 24.57107 24.65582 24.74148
## 197 Yemen, Rep. 22.90384 22.96813 23.02669 23.07279 23.12566 23.16944
## 198 Zambia 19.66295 19.69512 19.72538 19.75420 19.78070 19.80335
## 199 Zimbabwe 21.46989 21.48867 21.50738 21.52936 21.53383 21.54341
## Y1986 Y1987 Y1988 Y1989 Y1990 Y1991 Y1992 Y1993
## 194 25.15587 25.24624 25.35274 25.43322 25.52678 25.63444 25.75528 25.87036
## 195 19.19740 19.23481 19.27090 19.31105 19.35150 19.39625 19.45212 19.51493
## 196 24.82984 24.91615 25.00108 25.08593 25.17834 25.27187 25.37683 25.48893
## 197 23.20933 23.25043 23.29401 23.33879 23.38236 23.42152 23.46544 23.51371
## 198 19.82396 19.85065 19.88320 19.92451 19.96680 20.00746 20.04096 20.07781
## 199 21.54859 21.54590 21.55396 21.56903 21.58005 21.59694 21.59010 21.58547
## Y1994 Y1995 Y1996 Y1997 Y1998 Y1999 Y2000 Y2001
## 194 25.97218 26.08046 26.18272 26.29177 26.40105 26.50035 26.61021 26.71688
## 195 19.58757 19.66996 19.75854 19.84794 19.93580 20.02081 20.10343 20.18623
## 196 25.61249 25.73496 25.87428 26.01468 26.15144 26.28240 26.39074 26.45700
## 197 23.56154 23.61684 23.66973 23.72737 23.79152 23.85482 23.92467 23.99129
## 198 20.09502 20.09977 20.11009 20.12375 20.13349 20.15094 20.17261 20.20266
## 199 21.59029 21.58986 21.60362 21.62721 21.65496 21.68873 21.72652 21.76514
## Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008
## 194 26.79210 26.85498 26.95162 27.05633 27.17698 27.30849 27.44500
## 195 20.27145 20.36402 20.46585 20.57277 20.68655 20.80189 20.91630
## 196 26.48925 26.51152 26.52924 26.54329 26.54449 26.55460 26.57750
## 197 24.05692 24.12459 24.19204 24.25638 24.32120 24.37949 24.44157
## 198 20.24298 20.29474 20.35966 20.43398 20.51422 20.59770 20.68321
## 199 21.79645 21.82499 21.85806 21.89495 21.93371 21.97405 22.02660
## Country Y1980 Y1981 Y1982 Y1983 Y1984 Y1985
## 190 United States 25.46406 25.57524 25.67883 25.78812 25.90690 26.02568
## 191 Uruguay 24.24001 24.31948 24.39260 24.44209 24.49525 24.54516
## 192 Uzbekistan 24.56500 24.60077 24.62187 24.64780 24.66890 24.69832
## 193 Vanuatu 23.20701 23.32990 23.46016 23.60431 23.75134 23.89466
## 194 Venezuela 24.58052 24.69666 24.80082 24.89208 24.98440 25.07104
## 195 Vietnam 19.01394 19.03902 19.06804 19.09675 19.13046 19.16397
## 196 West Bank and Gaza 24.31624 24.40192 24.48713 24.57107 24.65582 24.74148
## 197 Yemen, Rep. 22.90384 22.96813 23.02669 23.07279 23.12566 23.16944
## 198 Zambia 19.66295 19.69512 19.72538 19.75420 19.78070 19.80335
## 199 Zimbabwe 21.46989 21.48867 21.50738 21.52936 21.53383 21.54341
## Y1986 Y1987 Y1988 Y1989 Y1990 Y1991 Y1992 Y1993
## 190 26.13740 26.25939 26.37687 26.49269 26.60827 26.71961 26.83609 26.95163
## 191 24.59804 24.67024 24.73972 24.80593 24.86952 24.95229 25.05527 25.15742
## 192 24.72305 24.74603 24.77115 24.78287 24.78404 24.77866 24.75026 24.73165
## 193 24.03171 24.15571 24.27529 24.39727 24.52964 24.66287 24.79143 24.92541
## 194 25.15587 25.24624 25.35274 25.43322 25.52678 25.63444 25.75528 25.87036
## 195 19.19740 19.23481 19.27090 19.31105 19.35150 19.39625 19.45212 19.51493
## 196 24.82984 24.91615 25.00108 25.08593 25.17834 25.27187 25.37683 25.48893
## 197 23.20933 23.25043 23.29401 23.33879 23.38236 23.42152 23.46544 23.51371
## 198 19.82396 19.85065 19.88320 19.92451 19.96680 20.00746 20.04096 20.07781
## 199 21.54859 21.54590 21.55396 21.56903 21.58005 21.59694 21.59010 21.58547
## Y1994 Y1995 Y1996 Y1997 Y1998 Y1999 Y2000 Y2001
## 190 27.06838 27.17810 27.28376 27.39366 27.49846 27.60386 27.71039 27.80569
## 191 25.26958 25.36804 25.47642 25.58248 25.68818 25.78625 25.86898 25.93469
## 192 24.71118 24.69517 24.67727 24.68203 24.69876 24.72082 24.75326 24.79418
## 193 25.05856 25.19282 25.32325 25.45811 25.59565 25.72398 25.85208 25.96032
## 194 25.97218 26.08046 26.18272 26.29177 26.40105 26.50035 26.61021 26.71688
## 195 19.58757 19.66996 19.75854 19.84794 19.93580 20.02081 20.10343 20.18623
## 196 25.61249 25.73496 25.87428 26.01468 26.15144 26.28240 26.39074 26.45700
## 197 23.56154 23.61684 23.66973 23.72737 23.79152 23.85482 23.92467 23.99129
## 198 20.09502 20.09977 20.11009 20.12375 20.13349 20.15094 20.17261 20.20266
## 199 21.59029 21.58986 21.60362 21.62721 21.65496 21.68873 21.72652 21.76514
## Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008
## 190 27.90479 28.00041 28.10039 28.19703 28.28959 28.37574 28.45698
## 191 25.96627 26.00585 26.06073 26.13136 26.20624 26.29256 26.39123
## 192 24.83998 24.88965 24.95455 25.03331 25.12717 25.22226 25.32054
## 193 26.05661 26.16060 26.27087 26.38887 26.51376 26.64903 26.78926
## 194 26.79210 26.85498 26.95162 27.05633 27.17698 27.30849 27.44500
## 195 20.27145 20.36402 20.46585 20.57277 20.68655 20.80189 20.91630
## 196 26.48925 26.51152 26.52924 26.54329 26.54449 26.55460 26.57750
## 197 24.05692 24.12459 24.19204 24.25638 24.32120 24.37949 24.44157
## 198 20.24298 20.29474 20.35966 20.43398 20.51422 20.59770 20.68321
## 199 21.79645 21.82499 21.85806 21.89495 21.93371 21.97405 22.02660
gather combines multiple columns into two rows with key and valuespread moves key value columns to multiple columns with keys as column namesseparate splits a column by _ or whatever seperator you choose to multiple columnsunite combines multiple columns to one column with _ as the seperator## Country Y1980 Y1981 Y1982 Y1983 Y1984 Y1985
## 1 Afghanistan 21.48678 21.46552 21.45145 21.43822 21.42734 21.41222
## 2 Albania 25.22533 25.23981 25.25636 25.27176 25.27901 25.28669
## 3 Algeria 22.25703 22.34745 22.43647 22.52105 22.60633 22.69501
## 4 Andorra 25.66652 25.70868 25.74681 25.78250 25.81874 25.85236
## 5 Angola 20.94876 20.94371 20.93754 20.93187 20.93569 20.94857
## 6 Antigua and Barbuda 23.31424 23.39054 23.45883 23.53735 23.63584 23.73109
## Y1986 Y1987 Y1988 Y1989 Y1990 Y1991 Y1992 Y1993
## 1 21.40132 21.37679 21.34018 21.29845 21.24818 21.20269 21.14238 21.06376
## 2 25.29451 25.30217 25.30450 25.31944 25.32357 25.28452 25.23077 25.21192
## 3 22.76979 22.84096 22.90644 22.97931 23.04600 23.11333 23.18776 23.25764
## 4 25.89089 25.93414 25.98477 26.04450 26.10936 26.17912 26.24017 26.30356
## 5 20.96030 20.98025 21.01375 21.05269 21.09007 21.12136 21.14987 21.13938
## 6 23.83449 23.93649 24.05364 24.16347 24.26782 24.36568 24.45644 24.54096
## Y1994 Y1995 Y1996 Y1997 Y1998 Y1999 Y2000 Y2001
## 1 20.97987 20.91132 20.85155 20.81307 20.78591 20.75469 20.69521 20.62643
## 2 25.22115 25.25874 25.31097 25.33988 25.39116 25.46555 25.55835 25.66701
## 3 23.32273 23.39526 23.46811 23.54160 23.61592 23.69486 23.77659 23.86256
## 4 26.36793 26.43569 26.50769 26.58255 26.66337 26.75078 26.83179 26.92373
## 5 21.14186 21.16022 21.19076 21.22621 21.27082 21.31954 21.37480 21.43664
## 6 24.60945 24.66461 24.72544 24.78714 24.84936 24.91721 24.99158 25.05857
## Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008
## 1 20.59848 20.58706 20.57759 20.58084 20.58749 20.60246 20.62058
## 2 25.77167 25.87274 25.98136 26.08939 26.20867 26.32753 26.44657
## 3 23.95294 24.05243 24.15957 24.27001 24.38270 24.48846 24.59620
## 4 27.02525 27.12481 27.23107 27.32827 27.43588 27.53363 27.63048
## 5 21.51765 21.59924 21.69218 21.80564 21.93881 22.08962 22.25083
## 6 25.13039 25.20713 25.29898 25.39965 25.51382 25.64247 25.76602
# Apply gather() to bmi and save the result as bmi_long
bmi_long <- gather(bmi, year, bmi_val, -Country)
# View the first 20 rows of the result
head(bmi_long,20)## Country year bmi_val
## 1 Afghanistan Y1980 21.48678
## 2 Albania Y1980 25.22533
## 3 Algeria Y1980 22.25703
## 4 Andorra Y1980 25.66652
## 5 Angola Y1980 20.94876
## 6 Antigua and Barbuda Y1980 23.31424
## 7 Argentina Y1980 25.37913
## 8 Armenia Y1980 23.82469
## 9 Australia Y1980 24.92729
## 10 Austria Y1980 24.84097
## 11 Azerbaijan Y1980 24.49375
## 12 Bahamas Y1980 24.21064
## 13 Bahrain Y1980 23.97588
## 14 Bangladesh Y1980 20.51918
## 15 Barbados Y1980 24.36372
## 16 Belarus Y1980 24.90898
## 17 Belgium Y1980 25.09879
## 18 Belize Y1980 24.54345
## 19 Benin Y1980 20.80754
## 20 Bermuda Y1980 25.07881
# Apply spread() to bmi_long
bmi_wide <- spread(bmi_long, year, bmi_val)
# View the head of bmi_wide
head(bmi_wide)## Country Y1980 Y1981 Y1982 Y1983 Y1984 Y1985
## 1 Afghanistan 21.48678 21.46552 21.45145 21.43822 21.42734 21.41222
## 2 Albania 25.22533 25.23981 25.25636 25.27176 25.27901 25.28669
## 3 Algeria 22.25703 22.34745 22.43647 22.52105 22.60633 22.69501
## 4 Andorra 25.66652 25.70868 25.74681 25.78250 25.81874 25.85236
## 5 Angola 20.94876 20.94371 20.93754 20.93187 20.93569 20.94857
## 6 Antigua and Barbuda 23.31424 23.39054 23.45883 23.53735 23.63584 23.73109
## Y1986 Y1987 Y1988 Y1989 Y1990 Y1991 Y1992 Y1993
## 1 21.40132 21.37679 21.34018 21.29845 21.24818 21.20269 21.14238 21.06376
## 2 25.29451 25.30217 25.30450 25.31944 25.32357 25.28452 25.23077 25.21192
## 3 22.76979 22.84096 22.90644 22.97931 23.04600 23.11333 23.18776 23.25764
## 4 25.89089 25.93414 25.98477 26.04450 26.10936 26.17912 26.24017 26.30356
## 5 20.96030 20.98025 21.01375 21.05269 21.09007 21.12136 21.14987 21.13938
## 6 23.83449 23.93649 24.05364 24.16347 24.26782 24.36568 24.45644 24.54096
## Y1994 Y1995 Y1996 Y1997 Y1998 Y1999 Y2000 Y2001
## 1 20.97987 20.91132 20.85155 20.81307 20.78591 20.75469 20.69521 20.62643
## 2 25.22115 25.25874 25.31097 25.33988 25.39116 25.46555 25.55835 25.66701
## 3 23.32273 23.39526 23.46811 23.54160 23.61592 23.69486 23.77659 23.86256
## 4 26.36793 26.43569 26.50769 26.58255 26.66337 26.75078 26.83179 26.92373
## 5 21.14186 21.16022 21.19076 21.22621 21.27082 21.31954 21.37480 21.43664
## 6 24.60945 24.66461 24.72544 24.78714 24.84936 24.91721 24.99158 25.05857
## Y2002 Y2003 Y2004 Y2005 Y2006 Y2007 Y2008
## 1 20.59848 20.58706 20.57759 20.58084 20.58749 20.60246 20.62058
## 2 25.77167 25.87274 25.98136 26.08939 26.20867 26.32753 26.44657
## 3 23.95294 24.05243 24.15957 24.27001 24.38270 24.48846 24.59620
## 4 27.02525 27.12481 27.23107 27.32827 27.43588 27.53363 27.63048
## 5 21.51765 21.59924 21.69218 21.80564 21.93881 22.08962 22.25083
## 6 25.13039 25.20713 25.29898 25.39965 25.51382 25.64247 25.76602
## Country_ISO year bmi_val
## 1 Afghanistan/AF Y1980 21.48678
## 2 Albania/AL Y1980 25.22533
## 3 Algeria/DZ Y1980 22.25703
## 4 Andorra/AD Y1980 25.66652
## 5 Angola/AO Y1980 20.94876
## 6 Antigua and Barbuda/AG Y1980 23.31424
# Apply separate() to bmi_cc
bmi_cc_clean <- separate(bmi_cc, col = Country_ISO, into = c("Country", "ISO"), sep = "/")
# Print the head of the result
head(bmi_cc_clean)## Country ISO year bmi_val
## 1 Afghanistan AF Y1980 21.48678
## 2 Albania AL Y1980 25.22533
## 3 Algeria DZ Y1980 22.25703
## 4 Andorra AD Y1980 25.66652
## 5 Angola AO Y1980 20.94876
## 6 Antigua and Barbuda AG Y1980 23.31424
# Apply unite() to bmi_cc_clean
bmi_cc <- unite(bmi_cc_clean, Country_ISO, Country, ISO, sep = "-")
# View the head of the result
head(bmi_cc)## Country_ISO year bmi_val
## 1 Afghanistan-AF Y1980 21.48678
## 2 Albania-AL Y1980 25.22533
## 3 Algeria-DZ Y1980 22.25703
## 4 Andorra-AD Y1980 25.66652
## 5 Angola-AO Y1980 20.94876
## 6 Antigua and Barbuda-AG Y1980 23.31424
## YEAR JAN FEB MAR APR MAY JUN JUL AUG SEP OCT
## 1 1992 146913 147270 146831 148082 149015 149821 150809 151064 152595 153577
## 2 1993 157525 156292 154774 158996 160624 160171 162832 162491 163285 164711
## 3 1994 167504 169652 172775 173099 172340 174307 174801 177289 178776 180569
## 4 1995 182423 179472 180996 181702 183543 186088 185470 186814 187338 186546
## 5 1996 189167 192269 193993 194712 196210 196127 196229 196215 198843 200488
## 6 1997 202414 204273 204965 203372 201676 204666 207049 207643 208298 208064
## NOV DEC
## 1 153605 155504
## 2 166593 168101
## 3 180695 181492
## 4 189052 190809
## 5 200200 201191
## 6 208982 209379
## YEAR month amount
## 1 1992 JAN 146913
## 2 1993 JAN 157525
## 3 1994 JAN 167504
## 4 1995 JAN 182423
## 5 1996 JAN 189167
## 6 1997 JAN 202414
# Arrange rows by YEAR using dplyr's arrange
census2 <- arrange(census2, YEAR)
# View first 20 rows of census2
head(census2, 20)## YEAR month amount
## 1 1992 JAN 146913
## 2 1992 FEB 147270
## 3 1992 MAR 146831
## 4 1992 APR 148082
## 5 1992 MAY 149015
## 6 1992 JUN 149821
## 7 1992 JUL 150809
## 8 1992 AUG 151064
## 9 1992 SEP 152595
## 10 1992 OCT 153577
## 11 1992 NOV 153605
## 12 1992 DEC 155504
## 13 1993 JAN 157525
## 14 1993 FEB 156292
## 15 1993 MAR 154774
## 16 1993 APR 158996
## 17 1993 MAY 160624
## 18 1993 JUN 160171
## 19 1993 JUL 162832
## 20 1993 AUG 162491
census_long <- readRDS('data/census_long.rds')
# View first 50 rows of census_long
head(census_long, 50)## YEAR month type amount
## 1 1992 JAN MED 146913.0
## 2 1992 FEB MED 147270.0
## 3 1992 MAR MED 146831.0
## 4 1992 APR MED 148082.0
## 5 1992 MAY MED 149015.0
## 6 1992 JUN MED 149821.0
## 7 1992 JUL MED 150809.0
## 8 1992 AUG MED 151064.0
## 9 1992 SEP MED 152595.0
## 10 1992 OCT MED 153577.0
## 11 1992 NOV MED 153605.0
## 12 1992 DEC MED 155504.0
## 13 1992 JAN LOW 138283.1
## 14 1992 FEB LOW 139097.8
## 15 1992 MAR LOW 139707.8
## 16 1992 APR LOW 146174.1
## 17 1992 MAY LOW 144659.5
## 18 1992 JUN LOW 140761.8
## 19 1992 JUL LOW 145347.8
## 20 1992 AUG LOW 149368.9
## 21 1992 SEP LOW 145721.9
## 22 1992 OCT LOW 151195.6
## 23 1992 NOV LOW 150228.7
## 24 1992 DEC LOW 146701.6
## 25 1992 JAN HIGH 148180.5
## 26 1992 FEB HIGH 150315.3
## 27 1992 MAR HIGH 149089.4
## 28 1992 APR HIGH 157623.9
## 29 1992 MAY HIGH 155522.7
## 30 1992 JUN HIGH 158115.4
## 31 1992 JUL HIGH 160292.5
## 32 1992 AUG HIGH 152280.8
## 33 1992 SEP HIGH 154387.8
## 34 1992 OCT HIGH 157158.0
## 35 1992 NOV HIGH 158224.6
## 36 1992 DEC HIGH 162142.4
## 37 1993 JAN MED 157525.0
## 38 1993 FEB MED 156292.0
## 39 1993 MAR MED 154774.0
## 40 1993 APR MED 158996.0
## 41 1993 MAY MED 160624.0
## 42 1993 JUN MED 160171.0
## 43 1993 JUL MED 162832.0
## 44 1993 AUG MED 162491.0
## 45 1993 SEP MED 163285.0
## 46 1993 OCT MED 164711.0
## 47 1993 NOV MED 166593.0
## 48 1993 DEC MED 168101.0
## 49 1993 JAN LOW 148895.1
## 50 1993 FEB LOW 148119.8
# Spread the type column
census_long2 <- spread(census_long, type, amount)
# View first 20 rows of census_long2
head(census_long2, 20)## YEAR month HIGH LOW MED
## 1 1992 APR 157623.9 146174.1 148082
## 2 1992 AUG 152280.8 149368.9 151064
## 3 1992 DEC 162142.4 146701.6 155504
## 4 1992 FEB 150315.3 139097.8 147270
## 5 1992 JAN 148180.5 138283.1 146913
## 6 1992 JUL 160292.5 145347.8 150809
## 7 1992 JUN 158115.4 140761.8 149821
## 8 1992 MAR 149089.4 139707.8 146831
## 9 1992 MAY 155522.7 144659.5 149015
## 10 1992 NOV 158224.6 150228.7 153605
## 11 1992 OCT 157158.0 151195.6 153577
## 12 1992 SEP 154387.8 145721.9 152595
## 13 1993 APR 168537.9 157088.1 158996
## 14 1993 AUG 163707.8 160795.9 162491
## 15 1993 DEC 174739.4 159298.6 168101
## 16 1993 FEB 159337.3 148119.8 156292
## 17 1993 JAN 158792.5 148895.1 157525
## 18 1993 JUL 172315.5 157370.8 162832
## 19 1993 JUN 168465.4 151111.8 160171
## 20 1993 MAR 157032.4 147650.8 154774
## yr_month HIGH LOW MED
## 1 1992_APR 157623.9 146174.1 148082
## 2 1992_AUG 152280.8 149368.9 151064
## 3 1992_DEC 162142.4 146701.6 155504
## 4 1992_FEB 150315.3 139097.8 147270
## 5 1992_JAN 148180.5 138283.1 146913
## 6 1992_JUL 160292.5 145347.8 150809
# Separate the yr_month column into two
census_long4 <- separate(census_long3, yr_month, c("year","month"))
# View the first 6 rows of the result
head(census_long4)## year month HIGH LOW MED
## 1 1992 APR 157623.9 146174.1 148082
## 2 1992 AUG 152280.8 149368.9 151064
## 3 1992 DEC 162142.4 146701.6 155504
## 4 1992 FEB 150315.3 139097.8 147270
## 5 1992 JAN 148180.5 138283.1 146913
## 6 1992 JUL 160292.5 145347.8 150809
 Â
as.characteras.numericas.integeras.factoras.logicallubridate
ymd, mdy, hms, ymd_hms, etc## [1] "character"
## [1] "numeric"
## [1] "integer"
## [1] "factor"
## [1] "logical"
## 'data.frame': 395 obs. of 31 variables:
## $ school : Factor w/ 2 levels "GP","MS": 1 1 1 1 1 1 1 1 1 1 ...
## $ sex : Factor w/ 2 levels "F","M": 1 1 1 1 1 2 2 1 2 2 ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : Factor w/ 2 levels "R","U": 2 2 2 2 2 2 2 2 2 2 ...
## $ famsize : Factor w/ 2 levels "GT3","LE3": 1 1 2 1 1 2 2 1 2 1 ...
## $ Pstatus : Factor w/ 2 levels "A","T": 1 2 2 2 2 2 2 1 1 2 ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : Factor w/ 5 levels "at_home","health",..: 1 1 1 2 3 4 3 3 4 3 ...
## $ Fjob : Factor w/ 5 levels "at_home","health",..: 5 3 3 4 3 3 3 5 3 3 ...
## $ reason : Factor w/ 4 levels "course","home",..: 1 1 3 2 2 4 2 2 2 2 ...
## $ guardian : Factor w/ 3 levels "father","mother",..: 2 1 2 2 1 2 2 2 2 2 ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : Factor w/ 2 levels "no","yes": 2 1 2 1 1 1 1 2 1 1 ...
## $ famsup : Factor w/ 2 levels "no","yes": 1 2 1 2 2 2 1 2 2 2 ...
## $ paid : Factor w/ 2 levels "no","yes": 1 1 2 2 2 2 1 1 2 2 ...
## $ activities: Factor w/ 2 levels "no","yes": 1 1 1 2 1 2 1 1 1 2 ...
## $ nursery : Factor w/ 2 levels "no","yes": 2 1 2 2 2 2 2 2 2 2 ...
## $ higher : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ internet : Factor w/ 2 levels "no","yes": 1 2 2 2 1 2 2 1 2 2 ...
## $ romantic : Factor w/ 2 levels "no","yes": 1 1 1 2 1 1 1 1 1 1 ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ Grades : Factor w/ 197 levels "10/0/0","10/10/0",..: 124 123 154 86 128 88 46 131 104 77 ...
# Coerce Grades to character
students$Grades <- as.character(students$Grades)
# Coerce Medu to factor
students$Medu <- as.factor(students$Medu)
# Coerce Fedu to factor
students$Fedu <- as.factor(students$Medu)
# Look at students once more with str()
str(students)## 'data.frame': 395 obs. of 31 variables:
## $ school : Factor w/ 2 levels "GP","MS": 1 1 1 1 1 1 1 1 1 1 ...
## $ sex : Factor w/ 2 levels "F","M": 1 1 1 1 1 2 2 1 2 2 ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : Factor w/ 2 levels "R","U": 2 2 2 2 2 2 2 2 2 2 ...
## $ famsize : Factor w/ 2 levels "GT3","LE3": 1 1 2 1 1 2 2 1 2 1 ...
## $ Pstatus : Factor w/ 2 levels "A","T": 1 2 2 2 2 2 2 1 1 2 ...
## $ Medu : Factor w/ 5 levels "0","1","2","3",..: 5 2 2 5 4 5 3 5 4 4 ...
## $ Fedu : Factor w/ 5 levels "0","1","2","3",..: 5 2 2 5 4 5 3 5 4 4 ...
## $ Mjob : Factor w/ 5 levels "at_home","health",..: 1 1 1 2 3 4 3 3 4 3 ...
## $ Fjob : Factor w/ 5 levels "at_home","health",..: 5 3 3 4 3 3 3 5 3 3 ...
## $ reason : Factor w/ 4 levels "course","home",..: 1 1 3 2 2 4 2 2 2 2 ...
## $ guardian : Factor w/ 3 levels "father","mother",..: 2 1 2 2 1 2 2 2 2 2 ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : Factor w/ 2 levels "no","yes": 2 1 2 1 1 1 1 2 1 1 ...
## $ famsup : Factor w/ 2 levels "no","yes": 1 2 1 2 2 2 1 2 2 2 ...
## $ paid : Factor w/ 2 levels "no","yes": 1 1 2 2 2 2 1 1 2 2 ...
## $ activities: Factor w/ 2 levels "no","yes": 1 1 1 2 1 2 1 1 1 2 ...
## $ nursery : Factor w/ 2 levels "no","yes": 2 1 2 2 2 2 2 2 2 2 ...
## $ higher : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ internet : Factor w/ 2 levels "no","yes": 1 2 2 2 1 2 2 1 2 2 ...
## $ romantic : Factor w/ 2 levels "no","yes": 1 1 1 2 1 1 1 1 1 1 ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ Grades : chr "5/6/6" "5/5/6" "7/8/10" "15/14/15" ...
## 'data.frame': 395 obs. of 33 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ dob : chr "2000-06-05" "1999-11-25" "1998-02-02" "1997-12-20" ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime : int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities : chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ romantic : chr "no" "no" "no" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ nurse_visit: chr "2014-04-10 14:59:54" "2015-03-12 14:59:54" "2015-09-21 14:59:54" "2015-09-03 14:59:54" ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ Grades : chr "5/6/6" "5/5/6" "7/8/10" "15/14/15" ...
## [1] "2015-09-17"
## [1] "2012-07-15 12:56:00 UTC"
# Coerce dob to a date (with no time)
students2$dob <- ymd(students2$dob)
# Coerce nurse_visit to a date and time
students2$nurse_visit <- ymd_hms(students2$nurse_visit)
# Look at students2 once more with str()
str(students2)## 'data.frame': 395 obs. of 33 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ dob : Date, format: "2000-06-05" "1999-11-25" ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime : int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities : chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ romantic : chr "no" "no" "no" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ nurse_visit: POSIXct, format: "2014-04-10 14:59:54" "2015-03-12 14:59:54" ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ Grades : chr "5/6/6" "5/5/6" "7/8/10" "15/14/15" ...
stringr packagestr_trim, str_pad, str_detect, str_replacestringr replaces the disparate base r functions for working with stings
paste, paste0, gsub, grep, etc# The stringr package is already loaded
# Trim all leading and trailing whitespace
str_trim(c(" Filip ", "Nick ", " Jonathan"))## [1] "Filip" "Nick" "Jonathan"
# Pad these strings with leading zeros
str_pad(c("23485W", "8823453Q", "994Z"), width=9, side='left', pad="0")## [1] "00023485W" "08823453Q" "00000994Z"
states <- c("al", "ak", "az", "ar", "ca", "co", "ct", "de", "fl", "ga",
"hi", "id", "il", "in", "ia", "ks", "ky", "la", "me", "md", "ma",
"mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny",
"nc", "nd", "oh", "ok", "or", "pa", "ri", "sc", "sd", "tn", "tx",
"ut", "vt", "va", "wa", "wv", "wi", "wy")
# Make states all uppercase and save result to states_upper
states_upper <- toupper(states)
states_upper## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "FL" "GA" "HI" "ID" "IL" "IN" "IA"
## [16] "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ"
## [31] "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VT"
## [46] "VA" "WA" "WV" "WI" "WY"
## [1] "al" "ak" "az" "ar" "ca" "co" "ct" "de" "fl" "ga" "hi" "id" "il" "in" "ia"
## [16] "ks" "ky" "la" "me" "md" "ma" "mi" "mn" "ms" "mo" "mt" "ne" "nv" "nh" "nj"
## [31] "nm" "ny" "nc" "nd" "oh" "ok" "or" "pa" "ri" "sc" "sd" "tn" "tx" "ut" "vt"
## [46] "va" "wa" "wv" "wi" "wy"
## X school sex dob address famsize Pstatus Medu Fedu Mjob Fjob
## 1 1 GP F 2000-06-05 U GT3 A 4 4 at_home teacher
## 2 2 GP F 1999-11-25 U GT3 T 1 1 at_home other
## 3 3 GP F 1998-02-02 U LE3 T 1 1 at_home other
## 4 4 GP F 1997-12-20 U GT3 T 4 2 health services
## 5 5 GP F 1998-10-04 U GT3 T 3 3 other other
## 6 6 GP M 1999-06-16 U LE3 T 4 3 services other
## reason guardian traveltime studytime failures schoolsup famsup paid
## 1 course mother 2 2 0 yes no no
## 2 course father 1 2 0 no yes no
## 3 other mother 1 2 3 yes no yes
## 4 home mother 1 3 0 no yes yes
## 5 home father 1 2 0 no yes yes
## 6 reputation mother 1 2 0 no yes yes
## activities nursery higher internet romantic famrel freetime goout Dalc Walc
## 1 no yes yes no no 4 3 4 1 1
## 2 no no yes yes no 5 3 3 1 1
## 3 no yes yes yes no 4 3 2 2 3
## 4 yes yes yes yes yes 3 2 2 1 1
## 5 no yes yes no no 4 3 2 1 2
## 6 yes yes yes yes no 5 4 2 1 2
## health nurse_visit absences Grades
## 1 3 2014-04-10 14:59:54 6 5/6/6
## 2 3 2015-03-12 14:59:54 4 5/5/6
## 3 3 2015-09-21 14:59:54 10 7/8/10
## 4 5 2015-09-03 14:59:54 2 15/14/15
## 5 5 2015-04-07 14:59:54 4 6/10/10
## 6 5 2013-11-15 14:59:54 10 15/15/15
## [1] FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
## [13] FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [25] TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [37] TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
## [49] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
## [85] FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE TRUE FALSE TRUE
## [97] TRUE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
## [121] TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
## [133] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
## [145] FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE
## [169] FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [277] TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
## [289] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE
## [301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [313] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [325] FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE
## [337] FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE
## [349] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
## [361] FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [373] FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
## [385] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# In the sex column, replace "F" with "Female"...
students2$sex <- str_replace(students2$sex, "F", "Female")
# ...And "M" with "Male"
students2$sex <- str_replace(students2$sex, "M", "Male")
# View the head of students2
head(students2)## X school sex dob address famsize Pstatus Medu Fedu Mjob
## 1 1 GP Female 2000-06-05 U GT3 A 4 4 at_home
## 2 2 GP Female 1999-11-25 U GT3 T 1 1 at_home
## 3 3 GP Female 1998-02-02 U LE3 T 1 1 at_home
## 4 4 GP Female 1997-12-20 U GT3 T 4 2 health
## 5 5 GP Female 1998-10-04 U GT3 T 3 3 other
## 6 6 GP Male 1999-06-16 U LE3 T 4 3 services
## Fjob reason guardian traveltime studytime failures schoolsup famsup
## 1 teacher course mother 2 2 0 yes no
## 2 other course father 1 2 0 no yes
## 3 other other mother 1 2 3 yes no
## 4 services home mother 1 3 0 no yes
## 5 other home father 1 2 0 no yes
## 6 other reputation mother 1 2 0 no yes
## paid activities nursery higher internet romantic famrel freetime goout Dalc
## 1 no no yes yes no no 4 3 4 1
## 2 no no no yes yes no 5 3 3 1
## 3 yes no yes yes yes no 4 3 2 2
## 4 yes yes yes yes yes yes 3 2 2 1
## 5 yes no yes yes no no 4 3 2 1
## 6 yes yes yes yes yes no 5 4 2 1
## Walc health nurse_visit absences Grades
## 1 1 3 2014-04-10 14:59:54 6 5/6/6
## 2 1 3 2015-03-12 14:59:54 4 5/5/6
## 3 3 3 2015-09-21 14:59:54 10 7/8/10
## 4 1 5 2015-09-03 14:59:54 2 15/14/15
## 5 2 5 2015-04-07 14:59:54 4 6/10/10
## 6 2 5 2013-11-15 14:59:54 10 15/15/15
social_df <- readRDS('data/social_df.rds')
# Call is.na() on the full social_df to spot all NAs
is.na(social_df)## name n_friends status
## [1,] FALSE FALSE FALSE
## [2,] FALSE TRUE FALSE
## [3,] FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE
## [1] TRUE
## name n_friends status
## Alice:1 Min. : 43.0 :2
## David:1 1st Qu.: 94.0 Going out! :1
## Sarah:1 Median :145.0 Movie night...:1
## Tom :1 Mean :144.0
## 3rd Qu.:194.5
## Max. :244.0
## NA's :1
##
## Going out! Movie night...
## 2 1 1
# Replace all empty strings in status with NA
social_df$status[social_df$status == ""] <- NA
# Print social_df to the console
social_df## name n_friends status
## 1 Sarah 244 Going out!
## 2 Tom NA <NA>
## 3 David 145 Movie night...
## 4 Alice 43 <NA>
## [1] TRUE FALSE TRUE FALSE
## name n_friends status
## 1 Sarah 244 Going out!
## 3 David 145 Movie night...
summary and visualizations like hist or boxplot are good for finding these## school sex age address famsize Pstatus Medu
## GP:349 F:208 Min. :15.00 R: 88 GT3:281 A: 41 Min. :0.000
## MS: 46 M:187 1st Qu.:16.00 U:307 LE3:114 T:354 1st Qu.:2.000
## Median :17.00 Median :3.000
## Mean :16.75 Mean :2.749
## 3rd Qu.:18.00 3rd Qu.:4.000
## Max. :38.00 Max. :4.000
##
## Fedu Mjob Fjob reason guardian
## Min. :0.000 at_home : 59 at_home : 20 course :145 father: 90
## 1st Qu.:2.000 health : 34 health : 18 home :109 mother:273
## Median :2.000 other :141 other :217 other : 36 other : 32
## Mean :2.522 services:103 services:111 reputation:105
## 3rd Qu.:3.000 teacher : 58 teacher : 29
## Max. :4.000
##
## traveltime studytime failures schoolsup famsup paid
## Min. :1.000 Min. :1.000 Min. :0.0000 no :344 no :153 no :214
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 yes: 51 yes:242 yes:181
## Median :1.000 Median :2.000 Median :0.0000
## Mean :1.448 Mean :2.035 Mean :0.3342
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :4.000 Max. :3.0000
##
## activities nursery higher internet romantic famrel
## no :194 no : 81 no : 20 no : 66 no :263 Min. :1.000
## yes:201 yes:314 yes:375 yes:329 yes:132 1st Qu.:4.000
## Median :4.000
## Mean :3.944
## 3rd Qu.:5.000
## Max. :5.000
##
## freetime goout Dalc Walc
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :3.000 Median :3.000 Median :1.000 Median :2.000
## Mean :3.235 Mean :3.109 Mean :1.481 Mean :2.291
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
##
## health absences Grades
## Min. :1.000 Min. :-1.000 10/10/10: 9
## 1st Qu.:3.000 1st Qu.: 0.000 10/9/9 : 7
## Median :4.000 Median : 4.000 11/11/11: 7
## Mean :3.554 Mean : 5.691 16/15/15: 7
## 3rd Qu.:5.000 3rd Qu.: 8.000 8/9/10 : 7
## Max. :5.000 Max. :75.000 11/11/10: 6
## (Other) :352
# View a histogram of absences, but force zeros to be bucketed to the right of zero
hist(students3$absences, right=F)Â Â
## [1] "data.frame"
## [1] 286 35
## [1] "X" "year" "month" "measure" "X1" "X2" "X3"
## [8] "X4" "X5" "X6" "X7" "X8" "X9" "X10"
## [15] "X11" "X12" "X13" "X14" "X15" "X16" "X17"
## [22] "X18" "X19" "X20" "X21" "X22" "X23" "X24"
## [29] "X25" "X26" "X27" "X28" "X29" "X30" "X31"
## 'data.frame': 286 obs. of 35 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ month : int 12 12 12 12 12 12 12 12 12 12 ...
## $ measure: chr "Max.TemperatureF" "Mean.TemperatureF" "Min.TemperatureF" "Max.Dew.PointF" ...
## $ X1 : chr "64" "52" "39" "46" ...
## $ X2 : chr "42" "38" "33" "40" ...
## $ X3 : chr "51" "44" "37" "49" ...
## $ X4 : chr "43" "37" "30" "24" ...
## $ X5 : chr "42" "34" "26" "37" ...
## $ X6 : chr "45" "42" "38" "45" ...
## $ X7 : chr "38" "30" "21" "36" ...
## $ X8 : chr "29" "24" "18" "28" ...
## $ X9 : chr "49" "39" "29" "49" ...
## $ X10 : chr "48" "43" "38" "45" ...
## $ X11 : chr "39" "36" "32" "37" ...
## $ X12 : chr "39" "35" "31" "28" ...
## $ X13 : chr "42" "37" "32" "28" ...
## $ X14 : chr "45" "39" "33" "29" ...
## $ X15 : chr "42" "37" "32" "33" ...
## $ X16 : chr "44" "40" "35" "42" ...
## $ X17 : chr "49" "45" "41" "46" ...
## $ X18 : chr "44" "40" "36" "34" ...
## $ X19 : chr "37" "33" "29" "25" ...
## $ X20 : chr "36" "32" "27" "30" ...
## $ X21 : chr "36" "33" "30" "30" ...
## $ X22 : chr "44" "39" "33" "39" ...
## $ X23 : chr "47" "45" "42" "45" ...
## $ X24 : chr "46" "44" "41" "46" ...
## $ X25 : chr "59" "52" "44" "58" ...
## $ X26 : chr "50" "44" "37" "31" ...
## $ X27 : chr "52" "45" "38" "34" ...
## $ X28 : chr "52" "46" "40" "42" ...
## $ X29 : chr "41" "36" "30" "26" ...
## $ X30 : chr "30" "26" "22" "10" ...
## $ X31 : chr "30" "25" "20" "8" ...
## Observations: 286
## Variables: 35
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ...
## $ year <int> 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,...
## $ month <int> 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,...
## $ measure <chr> "Max.TemperatureF", "Mean.TemperatureF", "Min.TemperatureF"...
## $ X1 <chr> "64", "52", "39", "46", "40", "26", "74", "63", "52", "30.4...
## $ X2 <chr> "42", "38", "33", "40", "27", "17", "92", "72", "51", "30.7...
## $ X3 <chr> "51", "44", "37", "49", "42", "24", "100", "79", "57", "30....
## $ X4 <chr> "43", "37", "30", "24", "21", "13", "69", "54", "39", "30.5...
## $ X5 <chr> "42", "34", "26", "37", "25", "12", "85", "66", "47", "30.6...
## $ X6 <chr> "45", "42", "38", "45", "40", "36", "100", "93", "85", "30....
## $ X7 <chr> "38", "30", "21", "36", "20", "-3", "92", "61", "29", "30.6...
## $ X8 <chr> "29", "24", "18", "28", "16", "3", "92", "70", "47", "30.77...
## $ X9 <chr> "49", "39", "29", "49", "41", "28", "100", "93", "86", "30....
## $ X10 <chr> "48", "43", "38", "45", "39", "37", "100", "95", "89", "29....
## $ X11 <chr> "39", "36", "32", "37", "31", "27", "92", "87", "82", "29.8...
## $ X12 <chr> "39", "35", "31", "28", "27", "25", "85", "75", "64", "29.8...
## $ X13 <chr> "42", "37", "32", "28", "26", "24", "75", "65", "55", "29.8...
## $ X14 <chr> "45", "39", "33", "29", "27", "25", "82", "68", "53", "29.9...
## $ X15 <chr> "42", "37", "32", "33", "29", "27", "89", "75", "60", "30.1...
## $ X16 <chr> "44", "40", "35", "42", "36", "30", "96", "85", "73", "30.1...
## $ X17 <chr> "49", "45", "41", "46", "41", "32", "100", "85", "70", "29....
## $ X18 <chr> "44", "40", "36", "34", "30", "26", "89", "73", "57", "29.8...
## $ X19 <chr> "37", "33", "29", "25", "22", "20", "69", "63", "56", "30.1...
## $ X20 <chr> "36", "32", "27", "30", "24", "20", "89", "79", "69", "30.3...
## $ X21 <chr> "36", "33", "30", "30", "27", "25", "85", "77", "69", "30.3...
## $ X22 <chr> "44", "39", "33", "39", "34", "25", "89", "79", "69", "30.4...
## $ X23 <chr> "47", "45", "42", "45", "42", "37", "100", "91", "82", "30....
## $ X24 <chr> "46", "44", "41", "46", "44", "41", "100", "98", "96", "30....
## $ X25 <chr> "59", "52", "44", "58", "43", "29", "100", "75", "49", "29....
## $ X26 <chr> "50", "44", "37", "31", "29", "28", "70", "60", "49", "30.1...
## $ X27 <chr> "52", "45", "38", "34", "31", "29", "70", "60", "50", "30.2...
## $ X28 <chr> "52", "46", "40", "42", "35", "27", "76", "65", "53", "29.9...
## $ X29 <chr> "41", "36", "30", "26", "20", "10", "64", "51", "37", "30.2...
## $ X30 <chr> "30", "26", "22", "10", "4", "-6", "50", "38", "26", "30.36...
## $ X31 <chr> "30", "25", "20", "8", "5", "1", "57", "44", "31", "30.32",...
## X year month measure
## Min. : 1.00 Min. :2014 Min. : 1.000 Length:286
## 1st Qu.: 72.25 1st Qu.:2015 1st Qu.: 4.000 Class :character
## Median :143.50 Median :2015 Median : 7.000 Mode :character
## Mean :143.50 Mean :2015 Mean : 6.923
## 3rd Qu.:214.75 3rd Qu.:2015 3rd Qu.:10.000
## Max. :286.00 Max. :2015 Max. :12.000
## X1 X2 X3 X4
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X5 X6 X7 X8
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X9 X10 X11 X12
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X13 X14 X15 X16
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X17 X18 X19 X20
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X21 X22 X23 X24
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X25 X26 X27 X28
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X29 X30 X31
## Length:286 Length:286 Length:286
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## X year month measure X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
## 1 1 2014 12 Max.TemperatureF 64 42 51 43 42 45 38 29 49 48 39 39 42 45
## 2 2 2014 12 Mean.TemperatureF 52 38 44 37 34 42 30 24 39 43 36 35 37 39
## 3 3 2014 12 Min.TemperatureF 39 33 37 30 26 38 21 18 29 38 32 31 32 33
## 4 4 2014 12 Max.Dew.PointF 46 40 49 24 37 45 36 28 49 45 37 28 28 29
## 5 5 2014 12 MeanDew.PointF 40 27 42 21 25 40 20 16 41 39 31 27 26 27
## 6 6 2014 12 Min.DewpointF 26 17 24 13 12 36 -3 3 28 37 27 25 24 25
## X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
## 1 42 44 49 44 37 36 36 44 47 46 59 50 52 52 41 30 30
## 2 37 40 45 40 33 32 33 39 45 44 52 44 45 46 36 26 25
## 3 32 35 41 36 29 27 30 33 42 41 44 37 38 40 30 22 20
## 4 33 42 46 34 25 30 30 39 45 46 58 31 34 42 26 10 8
## 5 29 36 41 30 22 24 27 34 42 44 43 29 31 35 20 4 5
## 6 27 30 32 26 20 20 25 25 37 41 29 28 29 27 10 -6 1
## X year month measure X1 X2 X3 X4 X5 X6
## 1 1 2014 12 Max.TemperatureF 64 42 51 43 42 45
## 2 2 2014 12 Mean.TemperatureF 52 38 44 37 34 42
## 3 3 2014 12 Min.TemperatureF 39 33 37 30 26 38
## 4 4 2014 12 Max.Dew.PointF 46 40 49 24 37 45
## 5 5 2014 12 MeanDew.PointF 40 27 42 21 25 40
## 6 6 2014 12 Min.DewpointF 26 17 24 13 12 36
## 7 7 2014 12 Max.Humidity 74 92 100 69 85 100
## 8 8 2014 12 Mean.Humidity 63 72 79 54 66 93
## 9 9 2014 12 Min.Humidity 52 51 57 39 47 85
## 10 10 2014 12 Max.Sea.Level.PressureIn 30.45 30.71 30.4 30.56 30.68 30.42
## 11 11 2014 12 Mean.Sea.Level.PressureIn 30.13 30.59 30.07 30.33 30.59 30.24
## 12 12 2014 12 Min.Sea.Level.PressureIn 30.01 30.4 29.87 30.09 30.45 30.16
## 13 13 2014 12 Max.VisibilityMiles 10 10 10 10 10 10
## 14 14 2014 12 Mean.VisibilityMiles 10 8 5 10 10 4
## 15 15 2014 12 Min.VisibilityMiles 10 2 1 10 5 0
## X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18
## 1 38 29 49 48 39 39 42 45 42 44 49 44
## 2 30 24 39 43 36 35 37 39 37 40 45 40
## 3 21 18 29 38 32 31 32 33 32 35 41 36
## 4 36 28 49 45 37 28 28 29 33 42 46 34
## 5 20 16 41 39 31 27 26 27 29 36 41 30
## 6 -3 3 28 37 27 25 24 25 27 30 32 26
## 7 92 92 100 100 92 85 75 82 89 96 100 89
## 8 61 70 93 95 87 75 65 68 75 85 85 73
## 9 29 47 86 89 82 64 55 53 60 73 70 57
## 10 30.69 30.77 30.51 29.58 29.81 29.88 29.86 29.91 30.15 30.17 29.91 29.87
## 11 30.46 30.67 30.04 29.5 29.61 29.85 29.82 29.83 30.05 30.09 29.75 29.78
## 12 30.24 30.51 29.49 29.43 29.44 29.81 29.78 29.78 29.91 29.92 29.69 29.71
## 13 10 10 10 10 10 10 10 10 10 10 10 10
## 14 10 8 2 3 7 10 10 10 10 9 6 10
## 15 5 2 1 1 1 7 10 10 10 5 1 10
## X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30
## 1 37 36 36 44 47 46 59 50 52 52 41 30
## 2 33 32 33 39 45 44 52 44 45 46 36 26
## 3 29 27 30 33 42 41 44 37 38 40 30 22
## 4 25 30 30 39 45 46 58 31 34 42 26 10
## 5 22 24 27 34 42 44 43 29 31 35 20 4
## 6 20 20 25 25 37 41 29 28 29 27 10 -6
## 7 69 89 85 89 100 100 100 70 70 76 64 50
## 8 63 79 77 79 91 98 75 60 60 65 51 38
## 9 56 69 69 69 82 96 49 49 50 53 37 26
## 10 30.15 30.31 30.37 30.4 30.31 30.13 29.96 30.16 30.22 29.99 30.22 30.36
## 11 29.98 30.26 30.32 30.35 30.23 29.9 29.63 30.11 30.14 29.87 30.12 30.32
## 12 29.86 30.17 30.28 30.3 30.16 29.55 29.47 29.99 30.03 29.77 30 30.23
## 13 10 10 10 10 10 2 10 10 10 10 10 10
## 14 10 10 9 10 5 1 8 10 10 10 10 10
## 15 10 7 6 4 1 0 1 10 10 10 10 10
## X31
## 1 30
## 2 25
## 3 20
## 4 8
## 5 5
## 6 1
## 7 57
## 8 44
## 9 31
## 10 30.32
## 11 30.25
## 12 30.13
## 13 10
## 14 10
## 15 10
## X year month measure X1 X2 X3 X4 X5 X6 X7 X8
## 281 281 2015 12 Mean.Wind.SpeedMPH 6 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 282 2015 12 Max.Gust.SpeedMPH 17 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 283 2015 12 PrecipitationIn 0.14 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 284 2015 12 CloudCover 7 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 285 2015 12 Events Rain <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 286 2015 12 WindDirDegrees 109 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23
## 281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X24 X25 X26 X27 X28 X29 X30 X31
## 281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X year month measure X1 X2 X3 X4 X5 X6 X7 X8
## 277 277 2015 12 Max.VisibilityMiles 10 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 278 278 2015 12 Mean.VisibilityMiles 8 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 279 279 2015 12 Min.VisibilityMiles 1 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 280 280 2015 12 Max.Wind.SpeedMPH 15 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 281 281 2015 12 Mean.Wind.SpeedMPH 6 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 282 2015 12 Max.Gust.SpeedMPH 17 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 283 2015 12 PrecipitationIn 0.14 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 284 2015 12 CloudCover 7 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 285 2015 12 Events Rain <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 286 2015 12 WindDirDegrees 109 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23
## 277 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 278 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 279 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 280 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## X24 X25 X26 X27 X28 X29 X30 X31
## 277 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 278 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 279 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 280 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 282 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 283 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 284 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 285 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 286 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## The tidyr package is already loaded
# Gather the day columns
weather2 <- gather(weather, day, value, c(X1:X31), na.rm = TRUE)
# View the head
head(weather2)## X year month measure day value
## 1 1 2014 12 Max.TemperatureF X1 64
## 2 2 2014 12 Mean.TemperatureF X1 52
## 3 3 2014 12 Min.TemperatureF X1 39
## 4 4 2014 12 Max.Dew.PointF X1 46
## 5 5 2014 12 MeanDew.PointF X1 40
## 6 6 2014 12 Min.DewpointF X1 26
# First remove column of row names
weather2 <- weather2[, -1]
# Spread the measure data
weather3 <- spread(weather2, measure, value)
# View the head
head(weather3)## year month day CloudCover Events Max.Dew.PointF Max.Gust.SpeedMPH
## 1 2014 12 X1 6 Rain 46 29
## 2 2014 12 X10 8 Rain 45 29
## 3 2014 12 X11 8 Rain-Snow 37 28
## 4 2014 12 X12 7 Snow 28 21
## 5 2014 12 X13 5 28 23
## 6 2014 12 X14 4 29 20
## Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF Max.VisibilityMiles
## 1 74 30.45 64 10
## 2 100 29.58 48 10
## 3 92 29.81 39 10
## 4 85 29.88 39 10
## 5 75 29.86 42 10
## 6 82 29.91 45 10
## Max.Wind.SpeedMPH Mean.Humidity Mean.Sea.Level.PressureIn Mean.TemperatureF
## 1 22 63 30.13 52
## 2 23 95 29.5 43
## 3 21 87 29.61 36
## 4 16 75 29.85 35
## 5 17 65 29.82 37
## 6 15 68 29.83 39
## Mean.VisibilityMiles Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF
## 1 10 13 40 26
## 2 3 13 39 37
## 3 7 13 31 27
## 4 10 11 27 25
## 5 10 12 26 24
## 6 10 10 27 25
## Min.Humidity Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 1 52 30.01 39 10
## 2 89 29.43 38 1
## 3 82 29.44 32 1
## 4 64 29.81 31 7
## 5 55 29.78 32 10
## 6 53 29.78 33 10
## PrecipitationIn WindDirDegrees
## 1 0.01 268
## 2 0.28 357
## 3 0.02 230
## 4 T 286
## 5 T 298
## 6 0.00 306
## year month day CloudCover Events Max.Dew.PointF Max.Gust.SpeedMPH
## 1 2014 12 X1 6 Rain 46 29
## 2 2014 12 X10 8 Rain 45 29
## 3 2014 12 X11 8 Rain-Snow 37 28
## 4 2014 12 X12 7 Snow 28 21
## 5 2014 12 X13 5 28 23
## 6 2014 12 X14 4 29 20
## Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF Max.VisibilityMiles
## 1 74 30.45 64 10
## 2 100 29.58 48 10
## 3 92 29.81 39 10
## 4 85 29.88 39 10
## 5 75 29.86 42 10
## 6 82 29.91 45 10
## Max.Wind.SpeedMPH Mean.Humidity Mean.Sea.Level.PressureIn Mean.TemperatureF
## 1 22 63 30.13 52
## 2 23 95 29.5 43
## 3 21 87 29.61 36
## 4 16 75 29.85 35
## 5 17 65 29.82 37
## 6 15 68 29.83 39
## Mean.VisibilityMiles Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF
## 1 10 13 40 26
## 2 3 13 39 37
## 3 7 13 31 27
## 4 10 11 27 25
## 5 10 12 26 24
## 6 10 10 27 25
## Min.Humidity Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 1 52 30.01 39 10
## 2 89 29.43 38 1
## 3 82 29.44 32 1
## 4 64 29.81 31 7
## 5 55 29.78 32 10
## 6 53 29.78 33 10
## PrecipitationIn WindDirDegrees
## 1 0.01 268
## 2 0.28 357
## 3 0.02 230
## 4 T 286
## 5 T 298
## 6 0.00 306
## year month day CloudCover Events
## 1 2014 12 1 6 Rain
## 2 2014 12 10 8 Rain
## 3 2014 12 11 8 Rain-Snow
## 4 2014 12 12 7 Snow
## 5 2014 12 13 5
## 6 2014 12 14 4
# Unite the year, month, and day columns
weather4 <- unite(weather3, date, year, month, day, sep = "-")
head(weather4[1:5])## date CloudCover Events Max.Dew.PointF Max.Gust.SpeedMPH
## 1 2014-12-1 6 Rain 46 29
## 2 2014-12-10 8 Rain 45 29
## 3 2014-12-11 8 Rain-Snow 37 28
## 4 2014-12-12 7 Snow 28 21
## 5 2014-12-13 5 28 23
## 6 2014-12-14 4 29 20
# Convert date column to proper date format using lubridates's ymd()
weather4$date <- ymd(weather4$date)
str(weather4[1:5])## 'data.frame': 366 obs. of 5 variables:
## $ date : Date, format: "2014-12-01" "2014-12-10" ...
## $ CloudCover : chr "6" "8" "8" "7" ...
## $ Events : chr "Rain" "Rain" "Rain-Snow" "Snow" ...
## $ Max.Dew.PointF : chr "46" "45" "37" "28" ...
## $ Max.Gust.SpeedMPH: chr "29" "29" "28" "21" ...
# Rearrange columns using dplyr's select()
weather5 <- select(weather4, date, Events, CloudCover:WindDirDegrees)
# View the head of weather5
head(weather5)## date Events CloudCover Max.Dew.PointF Max.Gust.SpeedMPH Max.Humidity
## 1 2014-12-01 Rain 6 46 29 74
## 2 2014-12-10 Rain 8 45 29 100
## 3 2014-12-11 Rain-Snow 8 37 28 92
## 4 2014-12-12 Snow 7 28 21 85
## 5 2014-12-13 5 28 23 75
## 6 2014-12-14 4 29 20 82
## Max.Sea.Level.PressureIn Max.TemperatureF Max.VisibilityMiles
## 1 30.45 64 10
## 2 29.58 48 10
## 3 29.81 39 10
## 4 29.88 39 10
## 5 29.86 42 10
## 6 29.91 45 10
## Max.Wind.SpeedMPH Mean.Humidity Mean.Sea.Level.PressureIn Mean.TemperatureF
## 1 22 63 30.13 52
## 2 23 95 29.5 43
## 3 21 87 29.61 36
## 4 16 75 29.85 35
## 5 17 65 29.82 37
## 6 15 68 29.83 39
## Mean.VisibilityMiles Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF
## 1 10 13 40 26
## 2 3 13 39 37
## 3 7 13 31 27
## 4 10 11 27 25
## 5 10 12 26 24
## 6 10 10 27 25
## Min.Humidity Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 1 52 30.01 39 10
## 2 89 29.43 38 1
## 3 82 29.44 32 1
## 4 64 29.81 31 7
## 5 55 29.78 32 10
## 6 53 29.78 33 10
## PrecipitationIn WindDirDegrees
## 1 0.01 268
## 2 0.28 357
## 3 0.02 230
## 4 T 286
## 5 T 298
## 6 0.00 306
## 'data.frame': 366 obs. of 23 variables:
## $ date : Date, format: "2014-12-01" "2014-12-10" ...
## $ Events : chr "Rain" "Rain" "Rain-Snow" "Snow" ...
## $ CloudCover : chr "6" "8" "8" "7" ...
## $ Max.Dew.PointF : chr "46" "45" "37" "28" ...
## $ Max.Gust.SpeedMPH : chr "29" "29" "28" "21" ...
## $ Max.Humidity : chr "74" "100" "92" "85" ...
## $ Max.Sea.Level.PressureIn : chr "30.45" "29.58" "29.81" "29.88" ...
## $ Max.TemperatureF : chr "64" "48" "39" "39" ...
## $ Max.VisibilityMiles : chr "10" "10" "10" "10" ...
## $ Max.Wind.SpeedMPH : chr "22" "23" "21" "16" ...
## $ Mean.Humidity : chr "63" "95" "87" "75" ...
## $ Mean.Sea.Level.PressureIn: chr "30.13" "29.5" "29.61" "29.85" ...
## $ Mean.TemperatureF : chr "52" "43" "36" "35" ...
## $ Mean.VisibilityMiles : chr "10" "3" "7" "10" ...
## $ Mean.Wind.SpeedMPH : chr "13" "13" "13" "11" ...
## $ MeanDew.PointF : chr "40" "39" "31" "27" ...
## $ Min.DewpointF : chr "26" "37" "27" "25" ...
## $ Min.Humidity : chr "52" "89" "82" "64" ...
## $ Min.Sea.Level.PressureIn : chr "30.01" "29.43" "29.44" "29.81" ...
## $ Min.TemperatureF : chr "39" "38" "32" "31" ...
## $ Min.VisibilityMiles : chr "10" "1" "1" "7" ...
## $ PrecipitationIn : chr "0.01" "0.28" "0.02" "T" ...
## $ WindDirDegrees : chr "268" "357" "230" "286" ...
## date Events CloudCover Max.Dew.PointF Max.Gust.SpeedMPH
## 1 2014-12-01 Rain 6 46 29
## 2 2014-12-10 Rain 8 45 29
## 3 2014-12-11 Rain-Snow 8 37 28
## 4 2014-12-12 Snow 7 28 21
## 5 2014-12-13 5 28 23
## 6 2014-12-14 4 29 20
## 7 2014-12-15 2 33 21
## 8 2014-12-16 Rain 8 42 10
## 9 2014-12-17 Rain 8 46 26
## 10 2014-12-18 Rain 7 34 30
## 11 2014-12-19 4 25 23
## 12 2014-12-02 Rain-Snow 7 40 29
## 13 2014-12-20 Snow 6 30 26
## 14 2014-12-21 Snow 8 30 20
## 15 2014-12-22 Rain 7 39 22
## 16 2014-12-23 Rain 8 45 25
## 17 2014-12-24 Fog-Rain 8 46 15
## 18 2014-12-25 Rain 6 58 40
## 19 2014-12-26 1 31 25
## 20 2014-12-27 3 34 21
## Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF Max.VisibilityMiles
## 1 74 30.45 64 10
## 2 100 29.58 48 10
## 3 92 29.81 39 10
## 4 85 29.88 39 10
## 5 75 29.86 42 10
## 6 82 29.91 45 10
## 7 89 30.15 42 10
## 8 96 30.17 44 10
## 9 100 29.91 49 10
## 10 89 29.87 44 10
## 11 69 30.15 37 10
## 12 92 30.71 42 10
## 13 89 30.31 36 10
## 14 85 30.37 36 10
## 15 89 30.4 44 10
## 16 100 30.31 47 10
## 17 100 30.13 46 2
## 18 100 29.96 59 10
## 19 70 30.16 50 10
## 20 70 30.22 52 10
## Max.Wind.SpeedMPH Mean.Humidity Mean.Sea.Level.PressureIn Mean.TemperatureF
## 1 22 63 30.13 52
## 2 23 95 29.5 43
## 3 21 87 29.61 36
## 4 16 75 29.85 35
## 5 17 65 29.82 37
## 6 15 68 29.83 39
## 7 15 75 30.05 37
## 8 8 85 30.09 40
## 9 20 85 29.75 45
## 10 23 73 29.78 40
## 11 17 63 29.98 33
## 12 24 72 30.59 38
## 13 21 79 30.26 32
## 14 16 77 30.32 33
## 15 18 79 30.35 39
## 16 20 91 30.23 45
## 17 13 98 29.9 44
## 18 28 75 29.63 52
## 19 18 60 30.11 44
## 20 17 60 30.14 45
## Mean.VisibilityMiles Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF
## 1 10 13 40 26
## 2 3 13 39 37
## 3 7 13 31 27
## 4 10 11 27 25
## 5 10 12 26 24
## 6 10 10 27 25
## 7 10 6 29 27
## 8 9 4 36 30
## 9 6 11 41 32
## 10 10 14 30 26
## 11 10 11 22 20
## 12 8 15 27 17
## 13 10 10 24 20
## 14 9 9 27 25
## 15 10 8 34 25
## 16 5 13 42 37
## 17 1 6 44 41
## 18 8 14 43 29
## 19 10 11 29 28
## 20 10 9 31 29
## Min.Humidity Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 1 52 30.01 39 10
## 2 89 29.43 38 1
## 3 82 29.44 32 1
## 4 64 29.81 31 7
## 5 55 29.78 32 10
## 6 53 29.78 33 10
## 7 60 29.91 32 10
## 8 73 29.92 35 5
## 9 70 29.69 41 1
## 10 57 29.71 36 10
## 11 56 29.86 29 10
## 12 51 30.4 33 2
## 13 69 30.17 27 7
## 14 69 30.28 30 6
## 15 69 30.3 33 4
## 16 82 30.16 42 1
## 17 96 29.55 41 0
## 18 49 29.47 44 1
## 19 49 29.99 37 10
## 20 50 30.03 38 10
## PrecipitationIn WindDirDegrees
## 1 0.01 268
## 2 0.28 357
## 3 0.02 230
## 4 T 286
## 5 T 298
## 6 0.00 306
## 7 0.00 324
## 8 T 79
## 9 0.43 311
## 10 0.01 281
## 11 0.00 305
## 12 0.10 62
## 13 T 350
## 14 T 2
## 15 0.05 24
## 16 0.25 63
## 17 0.56 12
## 18 0.14 250
## 19 0.00 255
## 20 0.00 251
# See what happens if we try to convert PrecipitationIn to numeric
as.numeric(weather5$PrecipitationIn)## [1] 0.01 0.28 0.02 NA NA 0.00 0.00 NA 0.43 0.01 0.00 0.10 NA NA 0.05
## [16] 0.25 0.56 0.14 0.00 0.00 0.01 0.00 0.44 0.00 0.00 0.00 0.11 1.09 0.13 0.03
## [31] 2.90 0.00 0.00 0.00 0.20 0.00 NA 0.12 0.00 0.00 0.15 0.00 0.00 0.00 0.00
## [46] NA 0.00 0.71 0.00 0.10 0.95 0.01 NA 0.62 0.06 0.05 0.57 0.00 0.02 NA
## [61] 0.00 0.01 0.00 0.05 0.01 0.03 0.00 0.23 0.39 0.00 0.02 0.01 0.06 0.78 0.00
## [76] 0.17 0.11 0.00 NA 0.07 0.02 0.00 0.00 0.00 0.00 0.09 NA 0.07 0.37 0.88
## [91] 0.17 0.06 0.01 0.00 0.00 0.80 0.27 0.00 0.14 0.00 0.00 0.01 0.05 0.09 0.00
## [106] 0.00 0.00 0.04 0.80 0.21 0.12 0.00 0.26 NA 0.00 0.02 NA 0.00 0.00 NA
## [121] 0.00 0.00 0.09 0.00 0.00 0.00 0.01 0.00 0.00 0.06 0.00 0.00 0.00 0.61 0.54
## [136] NA 0.00 NA 0.00 0.00 0.10 0.07 0.00 0.03 0.00 0.39 0.00 0.00 0.03 0.26
## [151] 0.09 0.00 0.00 0.00 0.02 0.00 0.00 0.00 NA 0.00 0.00 0.27 0.00 0.00 0.00
## [166] NA 0.00 0.00 NA 0.00 0.00 NA 0.00 0.00 0.00 0.91 0.00 0.02 0.00 0.00
## [181] 0.00 0.00 0.38 0.00 0.00 0.00 NA 0.00 0.40 NA 0.00 0.00 0.00 0.74 0.04
## [196] 1.72 0.00 0.01 0.00 0.00 NA 0.20 1.43 NA 0.00 0.00 0.00 NA 0.09 0.00
## [211] NA NA 0.50 1.12 0.00 0.00 0.00 0.03 NA 0.00 NA 0.14 NA 0.00 NA
## [226] NA 0.00 0.00 0.01 0.00 NA 0.06 0.00 0.00 0.00 0.02 0.00 NA 0.00 0.00
## [241] 0.02 NA 0.15 NA 0.00 0.83 0.00 0.00 0.00 0.08 0.00 0.00 0.14 0.00 0.00
## [256] 0.00 0.63 NA 0.02 NA 0.00 NA 0.00 0.00 0.00 0.00 0.00 0.00 0.49 0.00
## [271] 0.00 0.00 0.00 0.00 0.00 0.17 0.66 0.01 0.38 0.00 0.00 0.00 0.00 0.00 0.00
## [286] 0.00 NA 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.04 0.01 2.46 NA 0.00
## [301] 0.00 0.00 0.20 0.00 NA 0.00 0.00 0.00 0.12 0.00 0.00 NA NA NA 0.00
## [316] 0.08 NA 0.07 NA 0.00 0.00 0.03 0.00 0.00 0.36 0.73 0.01 0.00 0.00 0.00
## [331] 0.00 0.00 0.00 0.00 0.34 NA 0.07 0.54 0.04 0.01 0.00 0.00 0.00 0.00 0.00
## [346] NA 0.00 0.86 0.00 0.30 0.04 0.00 0.00 0.00 0.00 0.21 0.00 0.00 0.00 0.00
## [361] 0.00 0.00 0.00 0.00 0.00 0.14
# Replace T with 0 (T = trace)
weather5$PrecipitationIn <- str_replace(weather5$PrecipitationIn, "T", "0")
# Convert characters to numerics
weather6 <- mutate_each(weather5, funs(as.numeric), CloudCover:WindDirDegrees)
# Look at result
str(weather6)## 'data.frame': 366 obs. of 23 variables:
## $ date : Date, format: "2014-12-01" "2014-12-10" ...
## $ Events : chr "Rain" "Rain" "Rain-Snow" "Snow" ...
## $ CloudCover : num 6 8 8 7 5 4 2 8 8 7 ...
## $ Max.Dew.PointF : num 46 45 37 28 28 29 33 42 46 34 ...
## $ Max.Gust.SpeedMPH : num 29 29 28 21 23 20 21 10 26 30 ...
## $ Max.Humidity : num 74 100 92 85 75 82 89 96 100 89 ...
## $ Max.Sea.Level.PressureIn : num 30.4 29.6 29.8 29.9 29.9 ...
## $ Max.TemperatureF : num 64 48 39 39 42 45 42 44 49 44 ...
## $ Max.VisibilityMiles : num 10 10 10 10 10 10 10 10 10 10 ...
## $ Max.Wind.SpeedMPH : num 22 23 21 16 17 15 15 8 20 23 ...
## $ Mean.Humidity : num 63 95 87 75 65 68 75 85 85 73 ...
## $ Mean.Sea.Level.PressureIn: num 30.1 29.5 29.6 29.9 29.8 ...
## $ Mean.TemperatureF : num 52 43 36 35 37 39 37 40 45 40 ...
## $ Mean.VisibilityMiles : num 10 3 7 10 10 10 10 9 6 10 ...
## $ Mean.Wind.SpeedMPH : num 13 13 13 11 12 10 6 4 11 14 ...
## $ MeanDew.PointF : num 40 39 31 27 26 27 29 36 41 30 ...
## $ Min.DewpointF : num 26 37 27 25 24 25 27 30 32 26 ...
## $ Min.Humidity : num 52 89 82 64 55 53 60 73 70 57 ...
## $ Min.Sea.Level.PressureIn : num 30 29.4 29.4 29.8 29.8 ...
## $ Min.TemperatureF : num 39 38 32 31 32 33 32 35 41 36 ...
## $ Min.VisibilityMiles : num 10 1 1 7 10 10 10 5 1 10 ...
## $ PrecipitationIn : num 0.01 0.28 0.02 0 0 0 0 0 0.43 0.01 ...
## $ WindDirDegrees : num 268 357 230 286 298 306 324 79 311 281 ...
## [1] 6
## date Events CloudCover Max.Dew.PointF
## Min. :2014-12-01 Length:366 Min. :0.000 Min. :-6.00
## 1st Qu.:2015-03-02 Class :character 1st Qu.:3.000 1st Qu.:32.00
## Median :2015-06-01 Mode :character Median :5.000 Median :47.50
## Mean :2015-06-01 Mean :4.708 Mean :45.48
## 3rd Qu.:2015-08-31 3rd Qu.:7.000 3rd Qu.:61.00
## Max. :2015-12-01 Max. :8.000 Max. :75.00
##
## Max.Gust.SpeedMPH Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF
## Min. : 0.00 Min. : 39.00 Min. :29.58 Min. :18.00
## 1st Qu.:21.00 1st Qu.: 73.25 1st Qu.:30.00 1st Qu.:42.00
## Median :25.50 Median : 86.00 Median :30.14 Median :60.00
## Mean :26.99 Mean : 85.69 Mean :30.16 Mean :58.93
## 3rd Qu.:31.25 3rd Qu.: 93.00 3rd Qu.:30.31 3rd Qu.:76.00
## Max. :94.00 Max. :1000.00 Max. :30.88 Max. :96.00
## NA's :6
## Max.VisibilityMiles Max.Wind.SpeedMPH Mean.Humidity
## Min. : 2.000 Min. : 8.00 Min. :28.00
## 1st Qu.:10.000 1st Qu.:16.00 1st Qu.:56.00
## Median :10.000 Median :20.00 Median :66.00
## Mean : 9.907 Mean :20.62 Mean :66.02
## 3rd Qu.:10.000 3rd Qu.:24.00 3rd Qu.:76.75
## Max. :10.000 Max. :38.00 Max. :98.00
##
## Mean.Sea.Level.PressureIn Mean.TemperatureF Mean.VisibilityMiles
## Min. :29.49 Min. : 8.00 Min. :-1.000
## 1st Qu.:29.87 1st Qu.:36.25 1st Qu.: 8.000
## Median :30.03 Median :53.50 Median :10.000
## Mean :30.04 Mean :51.40 Mean : 8.861
## 3rd Qu.:30.19 3rd Qu.:68.00 3rd Qu.:10.000
## Max. :30.77 Max. :84.00 Max. :10.000
##
## Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF Min.Humidity
## Min. : 4.00 Min. :-11.00 Min. :-18.00 Min. :16.00
## 1st Qu.: 8.00 1st Qu.: 24.00 1st Qu.: 16.25 1st Qu.:35.00
## Median :10.00 Median : 41.00 Median : 35.00 Median :46.00
## Mean :10.68 Mean : 38.96 Mean : 32.25 Mean :48.31
## 3rd Qu.:13.00 3rd Qu.: 56.00 3rd Qu.: 51.00 3rd Qu.:60.00
## Max. :22.00 Max. : 71.00 Max. : 68.00 Max. :96.00
##
## Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles PrecipitationIn
## Min. :29.16 Min. :-3.00 Min. : 0.000 Min. :0.0000
## 1st Qu.:29.76 1st Qu.:30.00 1st Qu.: 2.000 1st Qu.:0.0000
## Median :29.94 Median :46.00 Median :10.000 Median :0.0000
## Mean :29.93 Mean :43.33 Mean : 6.716 Mean :0.1016
## 3rd Qu.:30.09 3rd Qu.:60.00 3rd Qu.:10.000 3rd Qu.:0.0400
## Max. :30.64 Max. :74.00 Max. :10.000 Max. :2.9000
##
## WindDirDegrees
## Min. : 1.0
## 1st Qu.:113.0
## Median :222.0
## Mean :200.1
## 3rd Qu.:275.0
## Max. :360.0
##
# Find indices of NAs in Max.Gust.SpeedMPH
ind <- which(is.na(weather6$Max.Gust.SpeedMPH))
# Look at the full rows for records missing Max.Gust.SpeedMPH
weather6[ind, ]## date Events CloudCover Max.Dew.PointF Max.Gust.SpeedMPH Max.Humidity
## 161 2015-05-18 Fog 6 52 NA 100
## 205 2015-06-03 7 48 NA 93
## 273 2015-08-08 4 61 NA 87
## 275 2015-09-01 1 63 NA 78
## 308 2015-10-12 0 56 NA 89
## 358 2015-11-03 1 44 NA 82
## Max.Sea.Level.PressureIn Max.TemperatureF Max.VisibilityMiles
## 161 30.30 58 10
## 205 30.31 56 10
## 273 30.02 76 10
## 275 30.06 79 10
## 308 29.86 76 10
## 358 30.25 73 10
## Max.Wind.SpeedMPH Mean.Humidity Mean.Sea.Level.PressureIn Mean.TemperatureF
## 161 16 79 30.23 54
## 205 14 82 30.24 52
## 273 14 68 29.99 69
## 275 15 65 30.02 74
## 308 15 65 29.80 64
## 358 16 57 30.13 60
## Mean.VisibilityMiles Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF
## 161 8 10 48 43
## 205 10 7 45 43
## 273 10 6 57 54
## 275 10 9 62 59
## 308 10 8 51 48
## 358 10 8 42 40
## Min.Humidity Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 161 57 30.12 49 0
## 205 71 30.19 47 10
## 273 49 29.95 61 10
## 275 52 29.96 69 10
## 308 41 29.74 51 10
## 358 31 30.06 47 10
## PrecipitationIn WindDirDegrees
## 161 0 72
## 205 0 90
## 273 0 45
## 275 0 54
## 308 0 199
## 358 0 281
## date Events CloudCover Max.Dew.PointF
## Min. :2014-12-01 Length:366 Min. :0.000 Min. :-6.00
## 1st Qu.:2015-03-02 Class :character 1st Qu.:3.000 1st Qu.:32.00
## Median :2015-06-01 Mode :character Median :5.000 Median :47.50
## Mean :2015-06-01 Mean :4.708 Mean :45.48
## 3rd Qu.:2015-08-31 3rd Qu.:7.000 3rd Qu.:61.00
## Max. :2015-12-01 Max. :8.000 Max. :75.00
##
## Max.Gust.SpeedMPH Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF
## Min. : 0.00 Min. : 39.00 Min. :29.58 Min. :18.00
## 1st Qu.:21.00 1st Qu.: 73.25 1st Qu.:30.00 1st Qu.:42.00
## Median :25.50 Median : 86.00 Median :30.14 Median :60.00
## Mean :26.99 Mean : 85.69 Mean :30.16 Mean :58.93
## 3rd Qu.:31.25 3rd Qu.: 93.00 3rd Qu.:30.31 3rd Qu.:76.00
## Max. :94.00 Max. :1000.00 Max. :30.88 Max. :96.00
## NA's :6
## Max.VisibilityMiles Max.Wind.SpeedMPH Mean.Humidity
## Min. : 2.000 Min. : 8.00 Min. :28.00
## 1st Qu.:10.000 1st Qu.:16.00 1st Qu.:56.00
## Median :10.000 Median :20.00 Median :66.00
## Mean : 9.907 Mean :20.62 Mean :66.02
## 3rd Qu.:10.000 3rd Qu.:24.00 3rd Qu.:76.75
## Max. :10.000 Max. :38.00 Max. :98.00
##
## Mean.Sea.Level.PressureIn Mean.TemperatureF Mean.VisibilityMiles
## Min. :29.49 Min. : 8.00 Min. :-1.000
## 1st Qu.:29.87 1st Qu.:36.25 1st Qu.: 8.000
## Median :30.03 Median :53.50 Median :10.000
## Mean :30.04 Mean :51.40 Mean : 8.861
## 3rd Qu.:30.19 3rd Qu.:68.00 3rd Qu.:10.000
## Max. :30.77 Max. :84.00 Max. :10.000
##
## Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF Min.Humidity
## Min. : 4.00 Min. :-11.00 Min. :-18.00 Min. :16.00
## 1st Qu.: 8.00 1st Qu.: 24.00 1st Qu.: 16.25 1st Qu.:35.00
## Median :10.00 Median : 41.00 Median : 35.00 Median :46.00
## Mean :10.68 Mean : 38.96 Mean : 32.25 Mean :48.31
## 3rd Qu.:13.00 3rd Qu.: 56.00 3rd Qu.: 51.00 3rd Qu.:60.00
## Max. :22.00 Max. : 71.00 Max. : 68.00 Max. :96.00
##
## Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles PrecipitationIn
## Min. :29.16 Min. :-3.00 Min. : 0.000 Min. :0.0000
## 1st Qu.:29.76 1st Qu.:30.00 1st Qu.: 2.000 1st Qu.:0.0000
## Median :29.94 Median :46.00 Median :10.000 Median :0.0000
## Mean :29.93 Mean :43.33 Mean : 6.716 Mean :0.1016
## 3rd Qu.:30.09 3rd Qu.:60.00 3rd Qu.:10.000 3rd Qu.:0.0400
## Max. :30.64 Max. :74.00 Max. :10.000 Max. :2.9000
##
## WindDirDegrees
## Min. : 1.0
## 1st Qu.:113.0
## Median :222.0
## Mean :200.1
## 3rd Qu.:275.0
## Max. :360.0
##
# Find row with Max.Humidity of 1000
ind <- which(weather6$Max.Humidity == 1000)
# Look at the data for that day
weather6[ind, ]## date Events CloudCover Max.Dew.PointF
## 135 2015-04-21 Fog-Rain-Thunderstorm 6 57
## Max.Gust.SpeedMPH Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF
## 135 94 1000 29.75 65
## Max.VisibilityMiles Max.Wind.SpeedMPH Mean.Humidity
## 135 10 20 71
## Mean.Sea.Level.PressureIn Mean.TemperatureF Mean.VisibilityMiles
## 135 29.6 56 5
## Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF Min.Humidity
## 135 10 49 36 42
## Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 135 29.53 46 0
## PrecipitationIn WindDirDegrees
## 135 0.54 184
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.000 8.000 10.000 8.861 10.000 10.000
# Get index of row with -1 value
ind <- which(weather6$Mean.VisibilityMiles == -1)
# Look at full row
weather6[ind,]## date Events CloudCover Max.Dew.PointF Max.Gust.SpeedMPH Max.Humidity
## 192 2015-06-18 5 54 23 72
## Max.Sea.Level.PressureIn Max.TemperatureF Max.VisibilityMiles
## 192 30.14 76 10
## Max.Wind.SpeedMPH Mean.Humidity Mean.Sea.Level.PressureIn Mean.TemperatureF
## 192 17 59 30.04 67
## Mean.VisibilityMiles Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF
## 192 -1 10 49 45
## Min.Humidity Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles
## 192 46 29.93 57 10
## PrecipitationIn WindDirDegrees
## 192 0 189
## date Events CloudCover Max.Dew.PointF
## Min. :2014-12-01 Length:366 Min. :0.000 Min. :-6.00
## 1st Qu.:2015-03-02 Class :character 1st Qu.:3.000 1st Qu.:32.00
## Median :2015-06-01 Mode :character Median :5.000 Median :47.50
## Mean :2015-06-01 Mean :4.708 Mean :45.48
## 3rd Qu.:2015-08-31 3rd Qu.:7.000 3rd Qu.:61.00
## Max. :2015-12-01 Max. :8.000 Max. :75.00
##
## Max.Gust.SpeedMPH Max.Humidity Max.Sea.Level.PressureIn Max.TemperatureF
## Min. : 0.00 Min. : 39.00 Min. :29.58 Min. :18.00
## 1st Qu.:21.00 1st Qu.: 73.25 1st Qu.:30.00 1st Qu.:42.00
## Median :25.50 Median : 86.00 Median :30.14 Median :60.00
## Mean :26.99 Mean : 83.23 Mean :30.16 Mean :58.93
## 3rd Qu.:31.25 3rd Qu.: 93.00 3rd Qu.:30.31 3rd Qu.:76.00
## Max. :94.00 Max. :100.00 Max. :30.88 Max. :96.00
## NA's :6
## Max.VisibilityMiles Max.Wind.SpeedMPH Mean.Humidity
## Min. : 2.000 Min. : 8.00 Min. :28.00
## 1st Qu.:10.000 1st Qu.:16.00 1st Qu.:56.00
## Median :10.000 Median :20.00 Median :66.00
## Mean : 9.907 Mean :20.62 Mean :66.02
## 3rd Qu.:10.000 3rd Qu.:24.00 3rd Qu.:76.75
## Max. :10.000 Max. :38.00 Max. :98.00
##
## Mean.Sea.Level.PressureIn Mean.TemperatureF Mean.VisibilityMiles
## Min. :29.49 Min. : 8.00 Min. : 1.000
## 1st Qu.:29.87 1st Qu.:36.25 1st Qu.: 8.000
## Median :30.03 Median :53.50 Median :10.000
## Mean :30.04 Mean :51.40 Mean : 8.891
## 3rd Qu.:30.19 3rd Qu.:68.00 3rd Qu.:10.000
## Max. :30.77 Max. :84.00 Max. :10.000
##
## Mean.Wind.SpeedMPH MeanDew.PointF Min.DewpointF Min.Humidity
## Min. : 4.00 Min. :-11.00 Min. :-18.00 Min. :16.00
## 1st Qu.: 8.00 1st Qu.: 24.00 1st Qu.: 16.25 1st Qu.:35.00
## Median :10.00 Median : 41.00 Median : 35.00 Median :46.00
## Mean :10.68 Mean : 38.96 Mean : 32.25 Mean :48.31
## 3rd Qu.:13.00 3rd Qu.: 56.00 3rd Qu.: 51.00 3rd Qu.:60.00
## Max. :22.00 Max. : 71.00 Max. : 68.00 Max. :96.00
##
## Min.Sea.Level.PressureIn Min.TemperatureF Min.VisibilityMiles PrecipitationIn
## Min. :29.16 Min. :-3.00 Min. : 0.000 Min. :0.0000
## 1st Qu.:29.76 1st Qu.:30.00 1st Qu.: 2.000 1st Qu.:0.0000
## Median :29.94 Median :46.00 Median :10.000 Median :0.0000
## Mean :29.93 Mean :43.33 Mean : 6.716 Mean :0.1016
## 3rd Qu.:30.09 3rd Qu.:60.00 3rd Qu.:10.000 3rd Qu.:0.0400
## Max. :30.64 Max. :74.00 Max. :10.000 Max. :2.9000
##
## WindDirDegrees
## Min. : 1.0
## 1st Qu.:113.0
## Median :222.0
## Mean :200.1
## 3rd Qu.:275.0
## Max. :360.0
##
# Clean up column names
new_colnames <- c("date", "events", "cloud_cover", "max_dew_point_f",
"max_gust_speed_mph", "max_humidity", "max_sea_level_pressure_in",
"max_temperature_f", "max_visibility_miles", "max_wind_speed_mph",
"mean_humidity", "mean_sea_level_pressure_in", "mean_temperature_f",
"mean_visibility_miles", "mean_wind_speed_mph", "mean_dew_point_f",
"min_dew_point_f", "min_humidity", "min_sea_level_pressure_in",
"min_temperature_f", "min_visibility_miles", "precipitation_in",
"wind_dir_degrees"
)
names(weather6) <- new_colnames
# Replace empty cells in events column
weather6$events[weather6$events == ""] <- "None"
# Print the first 6 rows of weather6
head(weather6)## date events cloud_cover max_dew_point_f max_gust_speed_mph
## 1 2014-12-01 Rain 6 46 29
## 2 2014-12-10 Rain 8 45 29
## 3 2014-12-11 Rain-Snow 8 37 28
## 4 2014-12-12 Snow 7 28 21
## 5 2014-12-13 None 5 28 23
## 6 2014-12-14 None 4 29 20
## max_humidity max_sea_level_pressure_in max_temperature_f max_visibility_miles
## 1 74 30.45 64 10
## 2 100 29.58 48 10
## 3 92 29.81 39 10
## 4 85 29.88 39 10
## 5 75 29.86 42 10
## 6 82 29.91 45 10
## max_wind_speed_mph mean_humidity mean_sea_level_pressure_in
## 1 22 63 30.13
## 2 23 95 29.50
## 3 21 87 29.61
## 4 16 75 29.85
## 5 17 65 29.82
## 6 15 68 29.83
## mean_temperature_f mean_visibility_miles mean_wind_speed_mph mean_dew_point_f
## 1 52 10 13 40
## 2 43 3 13 39
## 3 36 7 13 31
## 4 35 10 11 27
## 5 37 10 12 26
## 6 39 10 10 27
## min_dew_point_f min_humidity min_sea_level_pressure_in min_temperature_f
## 1 26 52 30.01 39
## 2 37 89 29.43 38
## 3 27 82 29.44 32
## 4 25 64 29.81 31
## 5 24 55 29.78 32
## 6 25 53 29.78 33
## min_visibility_miles precipitation_in wind_dir_degrees
## 1 10 0.01 268
## 2 1 0.28 357
## 3 1 0.02 230
## 4 7 0.00 286
## 5 10 0.00 298
## 6 10 0.00 306