Synopsis

This HTML document is a report of the completion of 3rd week’s assignment. It summarizes the packages I have used, displays the code along with the analysis of the data and its representation in the graphical formats.

Packages Used

The following package was used:

library(ggplot2)  #To create graphs in R

Source Code

Accoring to the codebook (Global Summary of the Day (GSOD)) there are four variables in the weather dataset which are as follows:
1. Month
2. Day
3. Year
4. Average daily temperature

The average daily temperature is computed from 24 hourly temperature readings and “-99” is used when the data is missing. The lowest average temperature is -2.20 F whereas the highest average temperature was 89.20 F. This data has been recorded from the year 1995 until present.

Data Description

urlCincyWeather <- 'http://academic.udayton.edu/kissock/http/Weather/gsod95-current/OHCINCIN.txt';
dataCincyWeather <- read.table(urlCincyWeather,stringsAsFactors = FALSE)
colnames(dataCincyWeather) <- c("Month","Date","Year","AvgTemperature")

#View Structure of the Data Set
str(dataCincyWeather)
## 'data.frame':    7963 obs. of  4 variables:
##  $ Month         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year          : int  1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 ...
##  $ AvgTemperature: num  41.1 22.2 22.8 14.9 9.5 23.8 31.1 26.9 31.3 31.5 ...
#View Top Values from the Data Set
head(dataCincyWeather)
##   Month Date Year AvgTemperature
## 1     1    1 1995           41.1
## 2     1    2 1995           22.2
## 3     1    3 1995           22.8
## 4     1    4 1995           14.9
## 5     1    5 1995            9.5
## 6     1    6 1995           23.8
#View Bottom Values from the Data Set
tail(dataCincyWeather)
##      Month Date Year AvgTemperature
## 7958    10   14 2016           54.4
## 7959    10   15 2016           63.2
## 7960    10   16 2016           68.7
## 7961    10   17 2016           71.1
## 7962    10   18 2016           74.4
## 7963    10   19 2016           75.3
#Convertion into factor variables
dataCincyWeather$Month<-factor(dataCincyWeather$Month)
dataCincyWeather$Date<-factor(dataCincyWeather$Date)
dataCincyWeather$Year<-factor(dataCincyWeather$Year)

#View the total number of rows and colums for the Dataset
dim(dataCincyWeather)
## [1] 7963    4
#Assign NA to the record that have value =-99
dataCincyWeather$AvgTemperature[dataCincyWeather$AvgTemperature==-99] <-NA
sum(is.na(dataCincyWeather$AvgTemperature==TRUE))
## [1] 14
#Remove the NA values from the data set
dataCincyWeather <- na.omit(dataCincyWeather)

#View all the Summary Statistics of the Dataset.
summary(dataCincyWeather)
##      Month           Date           Year      AvgTemperature 
##  5      : 682   2      : 262   1996   : 366   Min.   :-2.20  
##  7      : 682   3      : 262   2000   : 366   1st Qu.:40.20  
##  1      : 681   4      : 262   2004   : 366   Median :57.10  
##  3      : 681   5      : 262   2012   : 366   Mean   :54.73  
##  8      : 681   6      : 262   1995   : 365   3rd Qu.:70.70  
##  10     : 670   8      : 262   1997   : 365   Max.   :89.20  
##  (Other):3872   (Other):6377   (Other):5755

Graphical Data

#Graph1
ggplot(data = dataCincyWeather, mapping = aes(x = Date, y = AvgTemperature, color = "yellow")) +
  geom_point()

ggtitle('DayWise Average Temperature')
## $title
## [1] "DayWise Average Temperature"
## 
## attr(,"class")
## [1] "labels"
#Graph2
ggplot(data = dataCincyWeather) + 
  geom_point(mapping = aes(x = Month, y = AvgTemperature, alpha = Month, color = "red"))

  ggtitle('Monthwise Average Temperature')
## $title
## [1] "Monthwise Average Temperature"
## 
## attr(,"class")
## [1] "labels"
#Graph3
ggplot(data = dataCincyWeather, mapping = aes(x = Year, y = AvgTemperature, color = "green")) + 
  geom_boxplot()

  ggtitle('Yearwise Average Temperature')
## $title
## [1] "Yearwise Average Temperature"
## 
## attr(,"class")
## [1] "labels"