This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Load & install packages

library(curl)
package 㤼㸱curl㤼㸲 was built under R version 3.3.3
library(devtools)
install_github("weatherData", "Ram-N")
Username parameter is deprecated. Please use Ram-N/weatherDataSkipping install of 'weatherData' from a github remote, the SHA1 (76401386) has not changed since last install.
  Use `force = TRUE` to force installation
library(weatherData)
#install.packages("weatherData") - the personal weather station data is only available via github version
#package info at https://ram-n.github.io/weatherData/
library(ggplot2)
Stackoverflow is a great place to get help: http://stackoverflow.com/tags/ggplot2.
library(psych)

Attaching package: 㤼㸱psych㤼㸲

The following objects are masked from 㤼㸱package:ggplot2㤼㸲:

    %+%, alpha
library(reshape2)
library(knitr)
library(lattice) #just to illustrate another histogram function
package 㤼㸱lattice㤼㸲 was built under R version 3.3.3

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

checkDataAvailability("SYD", "2017-02-15", station_type = "airportCode") 
Getting data from:
 https://www.wunderground.com/history/airport/SYD/2017/2/15/DailyHistory.html?format=1
[1] 72
The following columns are available for:2017-02-15
 [1] "TimeAEDT"              "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1] 36 14
        V1 V2 V3 V4   V5 V6   V7   V8 V9 V10  V11                V12 V13                 V14
1 12:00 AM 19 17 86 1018 15 West  7.4        Rain Light Rain Showers 260 2017-02-14 13:00:00
2 12:30 AM 19 17 88 1017 10  SSW 14.8  - N/A Rain Light Rain Showers 200 2017-02-14 13:30:00
3  1:00 AM 19 16 83 1017 10  WSW 11.1  - N/A           Mostly Cloudy 240 2017-02-14 14:00:00
4  2:00 AM 19 16 81 1017 NA West 11.1                                260 2017-02-14 15:00:00
5  2:30 AM 19 16 83 1016 10   NW 11.1  - N/A Rain Light Rain Showers 310 2017-02-14 15:30:00
6  3:00 AM 18 15 78 1017  7  NNW 11.1        Rain Light Rain Showers 340 2017-02-14 16:00:00
Checking Data Availability For SYD
Found Records for 2017-02-15
Data is Available
[1] 1
getWeatherForDate("SYD", "2017-02-15", opt_detailed=TRUE)
Getting data from:
 https://www.wunderground.com/history/airport/SYD/2017/2/15/DailyHistory.html?format=1
[1] 72
The following columns are available for:2017-02-15
 [1] "TimeAEDT"              "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1] 36 14
        V1 V2 V3 V4   V5 V6   V7   V8 V9 V10  V11                V12 V13                 V14
1 12:00 AM 19 17 86 1018 15 West  7.4        Rain Light Rain Showers 260 2017-02-14 13:00:00
2 12:30 AM 19 17 88 1017 10  SSW 14.8  - N/A Rain Light Rain Showers 200 2017-02-14 13:30:00
3  1:00 AM 19 16 83 1017 10  WSW 11.1  - N/A           Mostly Cloudy 240 2017-02-14 14:00:00
4  2:00 AM 19 16 81 1017 NA West 11.1                                260 2017-02-14 15:00:00
5  2:30 AM 19 16 83 1016 10   NW 11.1  - N/A Rain Light Rain Showers 310 2017-02-14 15:30:00
6  3:00 AM 18 15 78 1017  7  NNW 11.1        Rain Light Rain Showers 340 2017-02-14 16:00:00
Checking Data Availability For SYD
Found Records for 2017-02-15
Data is Available
Will be fetching these Columns:
[1] "Time"         "TemperatureC"
Begin getting Daily Data for SYD
SYD 1 2017-02-15 : Fetching 71 Rows with 2 Column(s)
getWeatherForDate("SYD", "2017-02-10", opt_detailed=TRUE)
Getting data from:
 https://www.wunderground.com/history/airport/SYD/2017/2/10/DailyHistory.html?format=1
[1] 75
The following columns are available for:2017-02-10
 [1] "TimeAEDT"              "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1] 37 14
        V1 V2 V3 V4   V5    V6  V7   V8 V9 V10 V11   V12 V13                 V14
1 12:00 AM 25 21 72 1011    25 NNE 13.0         NA  Haze  30 2017-02-09 13:00:00
2 12:30 AM 25 21 78 1010 -9999  NE  9.3  - N/A  NA Clear  40 2017-02-09 13:30:00
3  1:00 AM 25 21 78 1010 -9999  NE 11.1  - N/A  NA Clear  50 2017-02-09 14:00:00
4  2:00 AM 24 22 83 1010    NA  NE 16.7         NA        40 2017-02-09 15:00:00
5  2:30 AM 24 22 89 1010 -9999  NE 13.0  - N/A  NA Clear  50 2017-02-09 15:30:00
6  3:00 AM 24 21 83 1010 -9999  NE  9.3  - N/A  NA Clear  40 2017-02-09 16:00:00
Checking Data Availability For SYD
Found Records for 2017-02-10
Data is Available
Will be fetching these Columns:
[1] "Time"         "TemperatureC"
Begin getting Daily Data for SYD
SYD 1 2017-02-10 : Fetching 74 Rows with 2 Column(s)
getSummarizedWeather("SYD", "2017-02-10")

Grab some data

SYD_weather <- getWeatherForDate("SYD", "2017-02-15", end_date = "2017-04-04", opt_detailed=TRUE, opt_all_columns = T)
Getting data from:
 https://www.wunderground.com/history/airport/SYD/2017/2/15/DailyHistory.html?format=1
[1] 72
The following columns are available for:2017-02-15
 [1] "TimeAEDT"              "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1] 36 14
        V1 V2 V3 V4   V5 V6   V7   V8 V9 V10  V11                V12 V13                 V14
1 12:00 AM 19 17 86 1018 15 West  7.4        Rain Light Rain Showers 260 2017-02-14 13:00:00
2 12:30 AM 19 17 88 1017 10  SSW 14.8  - N/A Rain Light Rain Showers 200 2017-02-14 13:30:00
3  1:00 AM 19 16 83 1017 10  WSW 11.1  - N/A           Mostly Cloudy 240 2017-02-14 14:00:00
4  2:00 AM 19 16 81 1017 NA West 11.1                                260 2017-02-14 15:00:00
5  2:30 AM 19 16 83 1016 10   NW 11.1  - N/A Rain Light Rain Showers 310 2017-02-14 15:30:00
6  3:00 AM 18 15 78 1017  7  NNW 11.1        Rain Light Rain Showers 340 2017-02-14 16:00:00
Getting data from:
 https://www.wunderground.com/history/airport/SYD/2017/4/4/DailyHistory.html?format=1
[1] 77
The following columns are available for:2017-04-04
 [1] "TimeAEST"              "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1] 38 14
        V1 V2 V3 V4   V5 V6  V7   V8   V9 V10  V11                V12 V13                 V14
1 12:00 AM 17 13 67 1028 15  SE 35.2          Rain Light Rain Showers 130 2017-04-03 14:00:00
2 12:30 AM 18 14 77 1027 10 ESE 29.6    - N/A        Scattered Clouds 120 2017-04-03 14:30:00
3  1:00 AM 18 14 77 1027 10  SE 25.9    - N/A        Scattered Clouds 130 2017-04-03 15:00:00
4  2:00 AM 17 14 78 1027 NA ESE 27.8                                  110 2017-04-03 16:00:00
5  2:30 AM 17 13 77 1026  8 ESE 35.2 53.7 N/A Rain Light Rain Showers 120 2017-04-03 16:30:00
6  3:00 AM 18 15 83 1026 10 SSE 25.9    - N/A           Mostly Cloudy 150 2017-04-03 17:00:00
Checking Data Availability For SYD
Found 36 records for 2017-02-15
Found 38 records for 2017-04-04

Data is Available for the interval.

Will be fetching these Columns:
 [1] "Time"                  "TimeAEDT"              "TemperatureC"         
 [4] "Dew_PointC"            "Humidity"              "Sea_Level_PressurehPa"
 [7] "VisibilityKm"          "Wind_Direction"        "Wind_SpeedKm_h"       
[10] "Gust_SpeedKm_h"        "Precipitationmm"       "Events"               
[13] "Conditions"            "WindDirDegrees"        "DateUTC"              
Begin getting Daily Data for SYD
SYD 1 2017-02-15 : Fetching 71 Rows with 15 Column(s)
SYD 2 2017-02-16 : Fetching 69 Rows with 15 Column(s)
SYD 3 2017-02-17 : Fetching 77 Rows with 15 Column(s)
SYD 4 2017-02-18 : Fetching 71 Rows with 15 Column(s)
SYD 5 2017-02-19 : Fetching 74 Rows with 15 Column(s)
SYD 6 2017-02-20 : Fetching 70 Rows with 15 Column(s)
SYD 7 2017-02-21 : Fetching 66 Rows with 15 Column(s)
SYD 8 2017-02-22 : Fetching 67 Rows with 15 Column(s)
SYD 9 2017-02-23 : Fetching 67 Rows with 15 Column(s)
SYD 10 2017-02-24 : Fetching 69 Rows with 15 Column(s)
SYD 11 2017-02-25 : Fetching 90 Rows with 15 Column(s)
SYD 12 2017-02-26 : Fetching 82 Rows with 15 Column(s)
SYD 13 2017-02-27 : Fetching 79 Rows with 15 Column(s)
SYD 14 2017-02-28 : Fetching 69 Rows with 15 Column(s)
SYD 15 2017-03-01 : Fetching 78 Rows with 15 Column(s)
SYD 16 2017-03-02 : Fetching 72 Rows with 15 Column(s)
SYD 17 2017-03-03 : Fetching 78 Rows with 15 Column(s)
SYD 18 2017-03-04 : Fetching 76 Rows with 15 Column(s)
SYD 19 2017-03-05 : Fetching 77 Rows with 15 Column(s)
SYD 20 2017-03-06 : Fetching 69 Rows with 15 Column(s)
SYD 21 2017-03-07 : Fetching 75 Rows with 15 Column(s)
SYD 22 2017-03-08 : Fetching 69 Rows with 15 Column(s)
SYD 23 2017-03-09 : Fetching 66 Rows with 15 Column(s)
SYD 24 2017-03-10 : Fetching 70 Rows with 15 Column(s)
SYD 25 2017-03-11 : Fetching 68 Rows with 15 Column(s)
SYD 26 2017-03-12 : Fetching 72 Rows with 15 Column(s)
SYD 27 2017-03-13 : Fetching 69 Rows with 15 Column(s)
SYD 28 2017-03-14 : Fetching 72 Rows with 15 Column(s)
SYD 29 2017-03-15 : Fetching 72 Rows with 15 Column(s)
SYD 30 2017-03-16 : Fetching 78 Rows with 15 Column(s)
SYD 31 2017-03-17 : Fetching 88 Rows with 15 Column(s)
SYD 32 2017-03-18 : Fetching 80 Rows with 15 Column(s)
SYD 33 2017-03-19 : Fetching 81 Rows with 15 Column(s)
SYD 34 2017-03-20 : Fetching 67 Rows with 15 Column(s)
SYD 35 2017-03-21 : Fetching 72 Rows with 15 Column(s)
SYD 36 2017-03-22 : Fetching 76 Rows with 15 Column(s)
SYD 37 2017-03-23 : Fetching 72 Rows with 15 Column(s)
SYD 38 2017-03-24 : Fetching 67 Rows with 15 Column(s)
SYD 39 2017-03-25 : Fetching 67 Rows with 15 Column(s)
SYD 40 2017-03-26 : Fetching 64 Rows with 15 Column(s)
SYD 41 2017-03-27 : Fetching 71 Rows with 15 Column(s)
SYD 42 2017-03-28 : Fetching 70 Rows with 15 Column(s)
SYD 43 2017-03-29 : Fetching 67 Rows with 15 Column(s)
SYD 44 2017-03-30 : Fetching 81 Rows with 15 Column(s)
SYD 45 2017-03-31 : Fetching 70 Rows with 15 Column(s)
SYD 46 2017-04-01 : Fetching 68 Rows with 15 Column(s)
SYD 47 2017-04-02 : Fetching 76 Rows with 15 Column(s)
SYD 48 2017-04-03 : Fetching 73 Rows with 15 Column(s)
SYD 49 2017-04-04 : Fetching 76 Rows with 15 Column(s)
MEL_weather <- getWeatherForDate("MEL", "2017-02-15", end_date = "2017-04-04", opt_detailed=TRUE, opt_all_columns = T)
Getting data from:
 https://www.wunderground.com/history/airport/MEL/2017/2/15/DailyHistory.html?format=1
[1] 5
The following columns are available for:2017-02-15
 [1] "Time_0545"             "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1]  2 14
       V1 V2 V3 V4 V5 V6    V7   V8 V9 V10 V11 V12 V13                 V14
1 8:45 AM  3 -4 50 NA  8  Calm Calm NA  NA  NA  NA  NA 2017-02-15 03:00:00
2 2:45 PM 21 -7  7 NA 12 South 13.0 NA  NA  NA  NA 190 2017-02-15 09:00:00
Getting data from:
 https://www.wunderground.com/history/airport/MEL/2017/4/4/DailyHistory.html?format=1
[1] 6
The following columns are available for:2017-04-04
 [1] "Time_0545"             "TemperatureC"          "Dew_PointC"           
 [4] "Humidity"              "Sea_Level_PressurehPa" "VisibilityKm"         
 [7] "Wind_Direction"        "Wind_SpeedKm_h"        "Gust_SpeedKm_h"       
[10] "Precipitationmm"       "Events"                "Conditions"           
[13] "WindDirDegrees"        "DateUTC"              
[1]  3 14
        V1 V2 V3 V4 V5 V6    V7  V8 V9 V10 V11              V12 V13                 V14
1  5:45 AM  6  4 81 NA  8 South 3.7 NA  NA  NA                  190 2017-04-04 00:00:00
2 11:45 AM 22  5 23 NA 10    SW 1.9 NA  NA  NA Scattered Clouds 220 2017-04-04 06:00:00
3  5:45 PM 19  2 21 NA  8   WSW 3.7 NA  NA  NA    Partly Cloudy 250 2017-04-04 12:00:00
Checking Data Availability For MEL
Found 2 records for 2017-02-15
Found 3 records for 2017-04-04

Data is Available for the interval.

Will be fetching these Columns:
 [1] "Time"                  "Time_0545"             "TemperatureC"         
 [4] "Dew_PointC"            "Humidity"              "Sea_Level_PressurehPa"
 [7] "VisibilityKm"          "Wind_Direction"        "Wind_SpeedKm_h"       
[10] "Gust_SpeedKm_h"        "Precipitationmm"       "Events"               
[13] "Conditions"            "WindDirDegrees"        "DateUTC"              
Begin getting Daily Data for MEL
MEL 1 2017-02-15 : Fetching 4 Rows with 15 Column(s)
MEL 2 2017-02-16 : Fetching 5 Rows with 15 Column(s)
MEL 3 2017-02-17 : Fetching 5 Rows with 15 Column(s)
MEL 4 2017-02-18 : Fetching 5 Rows with 15 Column(s)
MEL 5 2017-02-19 : Fetching 5 Rows with 15 Column(s)
MEL 6 2017-02-20 : Fetching 5 Rows with 15 Column(s)
MEL 7 2017-02-21 : Fetching 5 Rows with 15 Column(s)
MEL 8 2017-02-22 : Fetching 5 Rows with 15 Column(s)
MEL 9 2017-02-23 : Fetching 4 Rows with 15 Column(s)
MEL 10 2017-02-24 : Fetching 5 Rows with 15 Column(s)
MEL 11 2017-02-25 : Fetching 4 Rows with 15 Column(s)
MEL 12 2017-02-26 : Fetching 5 Rows with 15 Column(s)
MEL 13 2017-02-27 : Fetching 5 Rows with 15 Column(s)
MEL 14 2017-02-28 : Fetching 5 Rows with 15 Column(s)
MEL 15 2017-03-01 : Fetching 5 Rows with 15 Column(s)
MEL 16 2017-03-02 : Fetching 4 Rows with 15 Column(s)
MEL 17 2017-03-03 : Fetching 4 Rows with 15 Column(s)
MEL 18 2017-03-04 : Fetching 5 Rows with 15 Column(s)
MEL 19 2017-03-05 : Fetching 4 Rows with 15 Column(s)
MEL 20 2017-03-06 : Fetching 5 Rows with 15 Column(s)
MEL 21 2017-03-07 : Fetching 4 Rows with 15 Column(s)
MEL 22 2017-03-08 : Fetching 4 Rows with 15 Column(s)
MEL 23 2017-03-09 : Fetching 5 Rows with 15 Column(s)
MEL 24 2017-03-10 : Fetching 3 Rows with 15 Column(s)
MEL 25 2017-03-11 : Fetching 4 Rows with 15 Column(s)
MEL 26 2017-03-12 : Fetching 4 Rows with 15 Column(s)
MEL 27 2017-03-13 : Fetching 5 Rows with 15 Column(s)
MEL 28 2017-03-14 : Fetching 3 Rows with 15 Column(s)
MEL 29 2017-03-15 : Fetching 4 Rows with 15 Column(s)
MEL 30 2017-03-16 : Fetching 4 Rows with 15 Column(s)
MEL 31 2017-03-17 : Fetching 5 Rows with 15 Column(s)
MEL 32 2017-03-18 : Fetching 5 Rows with 15 Column(s)
MEL 33 2017-03-19 : Fetching 4 Rows with 15 Column(s)
MEL 34 2017-03-20 : Fetching 5 Rows with 15 Column(s)
MEL 35 2017-03-21 : Fetching 4 Rows with 15 Column(s)
MEL 36 2017-03-22 : Fetching 4 Rows with 15 Column(s)
MEL 37 2017-03-23 : Fetching 5 Rows with 15 Column(s)
MEL 38 2017-03-24 : Fetching 5 Rows with 15 Column(s)
MEL 39 2017-03-25 : Fetching 4 Rows with 15 Column(s)
MEL 40 2017-03-26 : Fetching 5 Rows with 15 Column(s)
MEL 41 2017-03-27 : Fetching 5 Rows with 15 Column(s)
MEL 42 2017-03-28 : Fetching 4 Rows with 15 Column(s)
MEL 43 2017-03-29 : Fetching 5 Rows with 15 Column(s)
MEL 44 2017-03-30 : Fetching 4 Rows with 15 Column(s)
MEL 45 2017-03-31 : Fetching 5 Rows with 15 Column(s)
MEL 46 2017-04-01 : Fetching 4 Rows with 15 Column(s)
MEL 47 2017-04-02 : Fetching 5 Rows with 15 Column(s)
MEL 48 2017-04-03 : Fetching 5 Rows with 15 Column(s)
MEL 49 2017-04-04 : Fetching 5 Rows with 15 Column(s)
#getWeatherForDate(station_id = "ISYDNEY143", start_date = "2017-02-10", end_date = "2017-04-04", opt_detailed=TRUE, opt_all_columns = T, station_type = "ID")

How might we explore some of this data?

kable(rbind(describe(SYD_weather$TemperatureC),describe(MEL_weather$TemperatureC)))
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 3515 -356.99943 1912.3546 22 21.89264 2.9652 -9999 32 10031 -4.842284 21.45387 32.25565
X11 1 221 -78.54751 950.2152 13 11.91525 8.8956 -9999 30 10029 -10.297442 104.51675 63.91839

Let’s have a basic look at the distribution

hist(MEL_weather$TemperatureC)

hist(SYD_weather$TemperatureC)

Ah, there’s something wrong! Let’s fix that and get the melbourne and sydney data into the same frame

SYD_temp <- as.data.frame(as.numeric(unlist(subset(SYD_weather, TemperatureC >-300, select=c("TemperatureC"))))) #you could also replace these wiht NA, but here we're just going to exclude the missing data
colnames(SYD_temp)[1] <- "temp"
SYD_temp$loc <- "SYD"
MEL_temp <- as.data.frame(as.numeric(unlist(subset(MEL_weather, TemperatureC >-300, select=c("TemperatureC")))))
colnames(MEL_temp)[1] <- "temp"
MEL_temp$loc <- "MEL"
temps <- rbind(SYD_temp, MEL_temp)
temps$temp <- as.numeric(temps$temp)

Let’s look at it again

ggplot(temps, aes(x = temp, fill = loc)) + geom_histogram(alpha = .5, position = 'identity') #ah, what's the problem here?

Try that again

ggplot(temps, aes(x = temp, fill = loc)) + geom_histogram(alpha = .5, aes(y = ..density..), position = 'identity') #note use of 'density' because we have unequal temperature counts in each dataset. Alpha is the transparency level.

histogram(~ temp | loc, data=temps)

What’s wrong with this?

ggplot(temps) + 
  geom_bar(aes(x = loc, y = temp, fill = loc),
           position = "dodge", stat = "summary", fun.y = "mean")

More informative?

ggplot(temps, aes(x=loc, y=temp, fill=loc)) + geom_boxplot() +
    guides(fill=FALSE)+
    stat_summary(fun.y=mean, geom="point", shape=5, size=4)

Couple of useful things - let’s pull the date out to its own value, and this time we’ll replace missing values (-9999) with NA

ggplot(SYD_weather, aes(x=TemperatureC, y=Dew_PointC)) +
    geom_point(shape=1)      # Use hollow circles

SYD_weather[SYD_weather == -9999] <- NA
SYD_weather$date <- as.Date(SYD_weather$DateUTC)
MEL_weather[MEL_weather == -9999] <- NA
MEL_weather$date <- as.Date(MEL_weather$DateUTC)
ggplot(SYD_weather, aes(x=TemperatureC, y=Dew_PointC)) +
    geom_point(shape=1)      # Use hollow circles

What if we want to explore the relationship between Dew_PointC and other features https://support.office.com/en-us/article/Present-your-data-in-a-scatter-chart-or-a-line-chart-4570a80f-599a-4d6b-a155-104a9018b86e

One way you might be tempted to do this…

bad_example <- subset(SYD_weather, !is.na(Humidity), select=c("Humidity", "Dew_PointC","date"))
bad_example[c("Humidity","Dew_PointC")] <- lapply(bad_example[c("Humidity","Dew_PointC")],as.numeric)
NAs introduced by coercion
bad_example <- aggregate(. ~ date, bad_example, FUN=mean)
#convert to long
bad_example <- melt(bad_example, id.vars = c("date"))
ggplot(data=bad_example, aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line() +
    geom_point()

#Is date an important variable in this analysis? Does the scaling of the data gives us the best available insight into relationships of paired values? Is the use of a line to join datapoints appropriate given missing data?

A better way?

ggplot(SYD_weather, aes(x=Humidity, y=Dew_PointC)) +
    geom_point(shape=1)      # Use hollow circles

cor.test(as.numeric(SYD_weather$Humidity),as.numeric(SYD_weather$Dew_PointC))
NAs introduced by coercion

    Pearson's product-moment correlation

data:  as.numeric(SYD_weather$Humidity) and as.numeric(SYD_weather$Dew_PointC)
t = 45.252, df = 3380, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5927923 0.6347879
sample estimates:
      cor 
0.6142248 

Ok what if we want to look at how weather varies over time and place?

SYD_weather$loc <- "Sydney"
MEL_weather$loc <- "Melbourne"
weather <- rbind(SYD_weather[c("TemperatureC","Dew_PointC","Humidity","Wind_SpeedKm_h","Precipitationmm","Events","Conditions","DateUTC","loc")],MEL_weather[c("TemperatureC","Dew_PointC","Humidity","Wind_SpeedKm_h","Precipitationmm","Events","Conditions","DateUTC","loc")])
weather$month <- format(as.Date(weather$DateUTC), "%m")
ggplot(weather, aes(x=month, y=TemperatureC, fill=loc)) + geom_boxplot() +
    guides(fill=FALSE) +
    stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
    facet_wrap(~loc)

Or at how weather events vary by place

unique(weather$Events)
[1] "Rain"              ""                  NA                  "Thunderstorm"     
[5] "Rain-Thunderstorm" "Snow"              "Hail-Thunderstorm"
unique(weather$Conditions)
 [1] "Light Rain Showers"           ""                            
 [3] "Mostly Cloudy"                "Rain Showers"                
 [5] "Partly Cloudy"                "Scattered Clouds"            
 [7] "Clear"                        "Unknown"                     
 [9] "Haze"                         "Light Thunderstorms and Rain"
[11] "Thunderstorms and Rain"       "Light Rain"                  
[13] "Heavy Rain Showers"           "Heavy Thunderstorms and Rain"
[15] "Smoke"                        "Light Drizzle"               
[17] "Drizzle"                      "Thunderstorm"                
[19] "Heavy Rain"                   "Rain"                        
[21] "Overcast"                     "Heavy Drizzle"               
[23] NA                             "Heavy Snow"                  
[25] "Light Snow"                   "Thunderstorms with Hail"     
table(weather$Conditions,weather$loc)
                              
                               Melbourne Sydney
                                      49    762
  Clear                                0    394
  Drizzle                              2      6
  Haze                                 1     42
  Heavy Drizzle                        0      1
  Heavy Rain                           0      2
  Heavy Rain Showers                   0     25
  Heavy Snow                           2      0
  Heavy Thunderstorms and Rain         0      5
  Light Drizzle                        4     66
  Light Rain                           1     56
  Light Rain Showers                   0    413
  Light Snow                           2      0
  Light Thunderstorms and Rain         0     15
  Mostly Cloudy                        5    977
  Overcast                             4      9
  Partly Cloudy                       33    428
  Rain                                 0     13
  Rain Showers                         0    108
  Scattered Clouds                    77    226
  Smoke                                0      1
  Thunderstorm                         0      4
  Thunderstorms and Rain               0      4
  Thunderstorms with Hail              1      0
  Unknown                              0     11
weather_con <- unique(subset(weather,select=c("Conditions","DateUTC","loc")))
ggplot(data=weather_con, aes(x=Conditions, fill = loc)) +
    geom_bar(position=position_dodge()) +
    theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1))

weather_event <- unique(subset(weather,select=c("Events","DateUTC","loc")))
ggplot(data=weather_event, aes(x=Events, fill = loc)) +
    geom_bar(position=position_dodge()) +
    #scale_y_continuous(labels=scales::percent) +
    theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1))

We’ve often seen students refer to ‘average mood’. Sometimes this might make sense, but this is an analogous example…

#let's take the weather event data, and code it from best ('no event') to worst ('snow')
weather_event$Event[weather_event$Event==""] <- 6
weather_event$Event[weather_event$Event=="Rain"] <- 5
weather_event$Event[weather_event$Event=="Thunderstorm"] <- 4
weather_event$Event[weather_event$Event=="Rain-Thunderstorm"] <- 3
weather_event$Event[weather_event$Event=="Hail-Thunderstorm"] <- 2
weather_event$Event[weather_event$Event=="Snow"] <- 1
weather_event$Event <- as.numeric(weather_event$Event)
ggplot(weather_event, aes(x = loc, y = Event, fill=loc)) + geom_boxplot() +
    stat_summary(fun.y=mean, geom="point", shape=5, size=4)

