1. Import data

import and subset

data.new1=read.csv(file='201314final.csv',header=T,sep=",")

library(ggplot2)

##calculate average temperature
attach(data.new1)
t.ave=(TMAX+TMIN)/2
data.new1=cbind(data.new1,t.ave)
###
set1=data.new1[year>2009,]
set1$year=as.character(set1$year)
set1$year=as.factor(set1$year)
head(set1)
##           X    date1 sndp      TMAX      TMIN year month day monthday
## 18146 19993 20100101  559 -26.21634 -23.59803 2010     1   1      101
## 18147 19994 20100102  559 -30.11634 -31.39803 2010     1   2      102
## 18148 19995 20100103  559 -30.71634 -30.19803 2010     1   3      103
## 18149 19996 20100104  559 -25.71634 -28.59803 2010     1   4      104
## 18150 19997 20100105  559 -26.81634 -25.79803 2010     1   5      105
## 18151 19998 20100106  559 -22.41634 -21.89803 2010     1   6      106
##            date2      date5 data.13.max data.13.min   seq     t.ave
## 18146 2010-01-01 2015-01-01   -23.79961   -23.47901 18146 -24.90718
## 18147 2010-01-02 2015-01-02   -24.46379   -24.68626 18147 -30.75718
## 18148 2010-01-03 2015-01-03   -24.94797   -24.72352 18148 -30.45718
## 18149 2010-01-04 2015-01-04   -25.93216   -25.32077 18149 -27.15718
## 18150 2010-01-05 2015-01-05   -26.64634   -26.30803 18150 -26.30718
## 18151 2010-01-06 2015-01-06   -25.87634   -25.52803 18151 -22.15718

2. Line plot

attach(set1)
## The following object is masked _by_ .GlobalEnv:
## 
##     t.ave
## 
## The following objects are masked from data.new1:
## 
##     data.13.max, data.13.min, date1, date2, date5, day, month,
##     monthday, seq, sndp, TMAX, TMIN, X, year
p<-ggplot(set1,aes(x=date5,y=t.ave,group=year,col=year))+geom_line()
print(p)

##subset and line plot of 2010, rolling mean, gray area indicate the range( TMAX, TMIN )
set2=set1[set1$year==2010,]
attach(set2)
## The following object is masked _by_ .GlobalEnv:
## 
##     t.ave
## 
## The following objects are masked from set1:
## 
##     data.13.max, data.13.min, date1, date2, date5, day, month,
##     monthday, seq, sndp, t.ave, TMAX, TMIN, X, year
## 
## The following objects are masked from data.new1:
## 
##     data.13.max, data.13.min, date1, date2, date5, day, month,
##     monthday, seq, sndp, TMAX, TMIN, X, year
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
##calculate rolling max and min
set2$TMIN=rollmean(set2$TMIN,20,align = c("center"),na.pad = TRUE)
set2$TMAX=rollmean(set2$TMAX,20,align=c("center"),na.pad = TRUE)
set2$t.ave=rollmean(set2$t.ave,20,align=c("center"),na.pad = TRUE)
p1<-ggplot(set2,aes(x=set2$date5,y=set2$t.ave,group=1))+geom_line()

p1<-p1+geom_ribbon(aes(ymin=TMIN,ymax=TMAX),alpha=0.3)
print(p1)
## Warning: Removed 19 rows containing missing values (geom_path).

3. Barplot

attach(set1)
## The following object is masked _by_ .GlobalEnv:
## 
##     t.ave
## 
## The following objects are masked from set2:
## 
##     data.13.max, data.13.min, date1, date2, date5, day, month,
##     monthday, seq, sndp, t.ave, TMAX, TMIN, X, year
## 
## The following objects are masked from set1 (pos = 5):
## 
##     data.13.max, data.13.min, date1, date2, date5, day, month,
##     monthday, seq, sndp, t.ave, TMAX, TMIN, X, year
## 
## The following objects are masked from data.new1:
## 
##     data.13.max, data.13.min, date1, date2, date5, day, month,
##     monthday, seq, sndp, TMAX, TMIN, X, year
library(reshape2)

#with(mtcars, tapply(mpg, list(cyl, gear), sum))
summary=with(set1,tapply(set1$t.ave,list(set1$year,set1$month),mean,na.rm=TRUE))
summary=melt(summary)
summary$Var1=as.factor(as.character(summary$Var1))
#summary$Var2=as.factor(as.character(summary$Var2))
p2<-ggplot(summary,aes(x=summary$Var2,y=summary$value,fill=summary$Var1))+geom_bar(stat = "identity",position = "dodge")
print(p2)

4. dotplot

set1$date5=as.Date(set1$date5,format="%Y-%m-%d")

p3<-ggplot(set1,aes(x=set1$date5,y=set1$t.ave,col=set1$year))+geom_point()+geom_smooth(aes(group=1))
print(p3)
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).