#title: "R Notebook"
setwd("H:/My Drive/Courses/Spring2022/Computational_biology/Project-2_temperature-trend/Data")
alltemp<-read.table("ushcn2014_FLs_52i_tavg.txt", as.is=TRUE,
colClasses="character", sep="$")
## http://cdiac.ess-dive.lbl.gov/ftp/ushcn_v2.5_monthly/ushcn2014_FLs_52i_tavg.txt.gz
stations<-as.numeric(sub("USH00([[:digit:]]+).*", "\\1", alltemp$V1)) #str_replace in tidyverse does the same thing.
year<-as.numeric(sub("USH00[[:digit:]]+ +([[:digit:]]+).*", "\\1", alltemp$V1))
### the readme.txt contains critical information to be able to parse
### the input data
temps<-sub("^.{17}", "", alltemp$V1)
temps<-gsub("(.{5}).{4}", "\\1,", temps) # inserting comma between values
## could grab codes in .{4} portion if they were
## of interest and we were climate scientists
temps<-sub(",(.{5}).{3}$", ",\\1", temps)
temps<-read.csv(textConnection(temps),
header=FALSE, stringsAsFactors=FALSE,
na.strings="-9999", strip.white=TRUE)
rm(alltemp)
## transform all temps to C scale
temps.c<-((temps/10 - 32) * 5/9)
## find probability of each observation based on
## normal distribution parameterized by all data prior
## to the last 30 years
find.pnorm<-function(x)pnorm(x,
mean( x[-((length(x)-29):length(x))], na.rm=TRUE),
sd( x[-((length(x)-29):length(x))],na.rm=TRUE))
annual.temp.p<- by(temps[,13], stations, find.pnorm)
## calculate slopes for C temp ~ year for each station
slopes.c<- by(temps.c[,13], stations,
function(x){year<-1:length(x);coef(lm(x ~ year))[2]})
## retrieve metadata for stations
station.info<-read.table("http://cdiac.ess-dive.lbl.gov/ftp/ushcn_v2.5_monthly/ushcn-stations.txt",
as.is=TRUE, colClasses="character", sep="$", quote="",
comment.char="")
station.info<-paste(gsub("[[:space:]]+", "," ,
sub("^(.{35}).*", "\\1", station.info$V1)),
sub("[[:space:]]+$", "", sub("^.{36}(.{30}).*", "\\1", station.info$V1)), sep=",")
station.info<-read.csv(textConnection(station.info),
header=FALSE, stringsAsFactors=FALSE, quote="")
colnames(station.info)<-c("id", "lat", "long", "elevation",
"state", "sitename")
lm.c.all<-array(0, dim=c(13, nrow(station.info), 2))
# lm for all stations, all 12 months plus annual
for(i in 1:13){
for(j in 1:nrow(station.info)){
lm.c.all[i,j,]<-coef(lm(temps.c[stations == station.info$id[j],i] ~ year[stations == station.info$id[j]]))
}
}
wyoming.temps.c<-temps.c[stations %in% station.info$id[station.info$state == "WY"],]
wyoming.stations<-stations[stations %in% station.info$id[station.info$state == "WY"]]
wyoming.year<-year[stations %in% station.info$id[station.info$state == "WY"]]
wyoming.lm<- lm.c.all[,station.info$state == "WY",]
####################################################
wyoming.temps.c$years <- wyoming.year
wyoming.temps.c$stations <- wyoming.stations
colnames(wyoming.temps.c) <- c('jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'years', 'stations')
#Ans:1 #Most of the figure3 is replicated here. Improbable high (red) and low (blue) temperatures are highlighted.
wyoming.temp.p <- annual.temp.p[station.info$state == "WY"]
par(mfrow=c(3,10), mar=c(2,2,2,1))
for(st in 1:length(unique(wyoming.stations))){
plot(wyoming.year[wyoming.stations == unique(wyoming.stations)[st]],
wyoming.temps.c[wyoming.stations == unique(wyoming.stations)[st], 13], type='l',
main=station.info$sitename[station.info$id == unique(wyoming.stations)[st]], cex.main=.5)
abline(wyoming.lm[13,st,1], #intercept
wyoming.lm[13,st,2], #slope
col=ifelse(wyoming.lm[13,st,2]>0, "red", "blue")) #slope
points(wyoming.year[wyoming.stations == unique(wyoming.stations)[st]][unlist(wyoming.temp.p[st]) > 0.975],
wyoming.temps.c[wyoming.stations == unique(wyoming.stations)[st],13][unlist(wyoming.temp.p[st]) > 0.975], col="red")
points(wyoming.year[wyoming.stations == unique(wyoming.stations)[st]][unlist(wyoming.temp.p[st]) < 0.025],
wyoming.temps.c[wyoming.stations == unique(wyoming.stations)[st],13][unlist(wyoming.temp.p[st]) < 0.025], col="blue")
}
#Ans:2 #After adding recent 8 years (2007 to 2014) in dataset, it was found that (from thr graph) in most of the states temperature were increased than previous, however the rate of the increase in temperature was reduced compare to the previous 30 years from 2006. In the graph, we can see the black regression line is steeper than the red.
wy.most.recent <- data.frame()
for(j in 1985:2014){
wy.most.recent<- rbind(wy.most.recent, wyoming.temps.c[wyoming.year == j, ])}
wyrecent <- data.frame()
for(i in 1977:2006){
wyrecent<- rbind(wyrecent, wyoming.temps.c[wyoming.year == i, ])}
par(mfrow=c(3,10), mar=c(2,2,1,0)+0.1)
for(st in 1:length(unique(wyoming.stations))){
plot(wyrecent$years[wyrecent$stations == unique(wyrecent$stations)[st]],
wyrecent[wyrecent$stations == unique(wyrecent$stations)[st], 13], type='l',
main=station.info$sitename[station.info$id == unique(wyoming.stations)[st]], xlim = c(1977, 2014), lwd=2, cex.main=.5)
abline(coef(lm(wyrecent[wyrecent$stations == unique(wyrecent$stations)[st], 13]~wyrecent$years[wyrecent$stations == unique(wyrecent$stations)[st]] )))
lines(wy.most.recent$years[wy.most.recent$stations == unique(wy.most.recent$stations)[st]],
wy.most.recent[wy.most.recent$stations == unique(wy.most.recent$stations)[st], 13], type='l',
main=station.info$sitename[station.info$id == unique(wyoming.stations)[st]], col='red')
abline(coef(lm(wy.most.recent[wy.most.recent$stations == unique(wy.most.recent$stations)[st], 13]~wy.most.recent$years[wy.most.recent$stations == unique(wy.most.recent$stations)[st]] )), col='red')
}
#Ans:3 #If we observe the pattern of temperature change in recent 30 years (1977 - 2006), or the most recent 30 years (1985 - 2014), we find that the temperatures are increasing overall, though the rate of the temperature increase is reduced in the most recent 30 years.
par(mfrow=c(1,2))
plot(wyrecent$years, wyrecent$annual, xlab = "Years (1977-2006)", ylab = "Temperature in C")
abline(lm(wyrecent$annual~wyrecent$years), lwd=2)
plot(wy.most.recent$years, wy.most.recent$annual, xlab = "Years (1985-2014)", ylab = "Temperature in C", col='blue')
abline(lm(wy.most.recent$annual~wy.most.recent$years), lwd=2)
#Ans:4
setwd("H:/My Drive/Courses/Spring2022/Computational_biology/Project-2_temperature-trend/Data")
alltemp<-read.table("ushcn2014_FLs_52i_tavg.txt", as.is=TRUE,
colClasses="character", sep="$")
stations<-as.numeric(sub("USH00([[:digit:]]+).*", "\\1", alltemp$V1)) #str_replace in tidyverse does the same thing.
year<-as.numeric(sub("USH00[[:digit:]]+ +([[:digit:]]+).*", "\\1", alltemp$V1))
temps<-sub("^.{17}", "", alltemp$V1)
temps<-gsub("(.{5}).{4}", "\\1,", temps) # inserting comma between values
temps<-sub(",(.{5}).{3}$", ",\\1", temps)
temps<-read.csv(textConnection(temps),
header=FALSE, stringsAsFactors=FALSE,
na.strings="-9999", strip.white=TRUE)
rm(alltemp)
## transform all temps to C scale
temps.c<-((temps/10 - 32) * 5/9)
temps.c$years <- year
temps.c$stations <- stations
## retrieve metadata for stations
station.info<-read.table("http://cdiac.ess-dive.lbl.gov/ftp/ushcn_v2.5_monthly/ushcn-stations.txt",
as.is=TRUE, colClasses="character", sep="$", quote="",
comment.char="")
station.info<-paste(gsub("[[:space:]]+", "," ,
sub("^(.{35}).*", "\\1", station.info$V1)),
sub("[[:space:]]+$", "", sub("^.{36}(.{30}).*", "\\1", station.info$V1)), sep=",")
station.info<-read.csv(textConnection(station.info),
header=FALSE, stringsAsFactors=FALSE, quote="")
colnames(station.info)<-c("id", "lat", "long", "elevation",
"state", "sitename")
#temp data for all states
states <- unique(station.info$state)
for(i in states){
temp <- paste(i,'temp','c', sep = '.')
assign(temp, temps.c[stations %in% station.info$id[station.info$state == i],])
}
#adding state with all stations
st_station <- data.frame(id = stations)
all.station.states <- merge(st_station, station.info[, c('id', 'state')], by = 'id')
#Adding states to temps.c database
temps.c$states <- all.station.states$state
# Recent (previous 30 years from 2014) temp data in all states
temps.recent <- data.frame()
for(i in 1985:2014){
temps.recent<- rbind(temps.recent, temps.c[temps.c$years == i, ])}
temps.hist <- data.frame()
for(i in 1866:1984){
temps.hist<- rbind(temps.hist, temps.c[temps.c$years == i, ])}
#####################################################################################
#####################################################################################
#Precipitation data
allprec<-read.table("ushcn2014_raw_prcp.txt", as.is=TRUE,
colClasses="character", sep="$")
stations<-as.numeric(sub("USH00([[:digit:]]+).*", "\\1", allprec$V1)) #str_replace in tidyverse does the same thing.
year<-as.numeric(sub("USH00[[:digit:]]+ +([[:digit:]]+).*", "\\1", allprec$V1))
prec<-sub("^.{17}", "", allprec$V1)
prec<-gsub("(.{5}).{4}", "\\1,", prec) # inserting comma between values
prec<-sub(",(.{5}).{3}$", ",\\1", prec)
prec<-read.csv(textConnection(prec),
header=FALSE, stringsAsFactors=FALSE,
na.strings="-9999", strip.white=TRUE)
rm(allprec)
## transform to actual value in inch
prec.inch<-prec/100
rm(prec)
prec.inch$years <- year
prec.inch$stations <- stations
#prec data for all states
states <- unique(station.info$state)
for(i in states){
temp <- paste(i,'prec', sep = '.')
assign(temp, prec.inch[stations %in% station.info$id[station.info$state == i],])
}
#adding state with whole database
st_station <- data.frame(id = stations)
all.station.states <- merge(st_station, station.info[, c('id', 'state')], by = 'id')
#Add states to prec.inch database
prec.inch$states <- all.station.states$state
# Recent (previous 30 years from 2014) precipitation data in all states
prec.recent <- data.frame()
for(i in 1985:2014){
prec.recent<- rbind(prec.recent, prec.inch[prec.inch$years == i, ])}
prec.hist <- data.frame()
for(i in 1799:1984){
prec.hist<- rbind(prec.hist, prec.inch[prec.inch$years == i, ])}
#Temperature in Wyoming compared to other states # From the bar graph of historic and recent temperature in all the states, we find that Wyoming is among the one of the lowest temperature states in the US.
#Historic temperature change vs temperature change in last 30 years in wy and other states. #If we see the historic data in the graphs (black), we observe that in many states including Wyoming, the temperature was decreased to some extend. However if we see the recent data (red), we find that in all the stated including Wyoming but North Dakota the temperature was increased.
barplot(aggregate(V13~states, data=temps.hist, mean, na.rm = TRUE)[,2], main= "Temeperature in all states from 1866 to 1984 ", names.arg = aggregate(V13~states, data=temps.hist, mean, na.rm = TRUE)[,1], las=2, cex.axis = .5, cex.names =.5, ylab = "Temperature in C")
barplot(aggregate(V13~states, data=temps.recent, mean, na.rm = TRUE)[,2], main= "Temeperature in all states from 1985 to 2014 ", names.arg = aggregate(V13~states, data=temps.recent, mean, na.rm = TRUE)[,1], las=2, cex.axis = .5, cex.names =.5, ylab = "Temperature in C")
par(mfrow=c(6,8), mar=c(1.5,1.85,2,.5)+.7)
for(st in 1:length(unique(temps.c$states))){
tmp1 <- temps.hist[temps.hist$states==unique(temps.c$states)[st], ]
tmp2 <- temps.recent[temps.recent$states==unique(temps.c$states)[st], ]
comb1 <- aggregate(V13~years, data=tmp1, mean, na.rm=T)
comb2 <- aggregate(V13~years, data=tmp2, mean, na.rm=T)
plot(comb1$years,comb1$V13,
main= unique(temps.c$states)[st], lwd=2, type = 'l', xlim = c(1866, 2014), xlab = "Years", ylab = "Temperature in C")
abline(lm(comb1$V13~comb1$years))
lines(comb2$years,comb2$V13,
type = 'l', col='red')
abline(lm(comb2$V13~comb2$years), col='red')
}
mtext("Historic temperature change (black) vs temperature change in last 30 years (red) in all states", side = 3, line = -.9, outer = TRUE, cex = .5, font=2, col='blue')
#Precipitation in Wyoming compared to other states # From the bar graph of historic and recent precipitation data in all the states, we find that Wyoming is among the one of the states where the precipitation is lowest in the US.
#Historic precipitation change vs precipitation change in last 30 years in wy and other states #From the graph, we find that historically (black) precipitation was increased in the most of the states. However from the recent data (red), we find that in some states the precipitation increased, and in some states decreased. However in Wyoming and Montana, precipitation did not change much in the last 30 years.
barplot(aggregate(V13~states, data=prec.hist, mean, na.rm = TRUE)[,2], main= "Precipitation in all states from 1799 to 1984 ", names.arg = aggregate(V13~states, data=prec.hist, mean, na.rm = TRUE)[,1], las=2, cex.axis = .5, cex.names =.5, ylab = "Precipitation in inch")
barplot(aggregate(V13~states, data=prec.recent, mean, na.rm = TRUE)[,2], main= "Precipitation in all states from 1985 to 2014 ", names.arg = aggregate(V13~states, data=prec.recent, mean, na.rm = TRUE)[,1], las=2, cex.axis = .5, cex.names =.5, ylab = "Precipitation in inch")
par(mfrow=c(6,8), mar=c(1.5,1.85,2,.5)+.7)
for(st in 1:length(unique(prec.inch$states))){
tmp3 <- prec.hist[prec.hist$states==unique(prec.inch$states)[st], ]
tmp4 <- prec.recent[prec.recent$states==unique(prec.inch$states)[st], ]
comb3 <- aggregate(V13~years, data=tmp3, mean, na.rm=T)
comb4 <- aggregate(V13~years, data=tmp4, mean, na.rm=T)
plot(comb3$years,comb3$V13,
main= unique(prec.inch$states)[st], cex= .5, lwd=2, type = 'l', xlim = c(1866, 2014), xlab = "Year", ylab = "Precipitation in inch")
abline(lm(comb3$V13~comb3$years))
lines(comb4$years,comb4$V13,
type = 'l', col='red')
abline(lm(comb4$V13~comb4$years), col='red')
}
mtext("Historic precipitation change (black) vs precipitation change in last 30 years (red) in wy and other states", side = 3, line = -.9, outer = TRUE, cex = .5, font=2, col='blue')
#Precipitation vs temperature in wyoming vs coasal states in last 30 years. #From the graph we find that in the coastal states, both the temperatures and precipitations are higher than Wyoming. I think from this analysis a client will have a comprehensive idea about the temeperature and precipitation in Wyoming and in other states in US.
coast <- c('WY','FL', 'GA', 'ME', 'CT', 'VA', 'RI', 'MA', 'NC', 'SC', 'OR', 'NJ', 'CA')
par(mfrow=c(4,7), mar=c(1.5,1.5,2,.5)+.7)
for(st in 1:length(coast)){
tmp5 <- temps.recent[temps.recent$states==coast[st], ]
tmp6 <- prec.recent[prec.recent$states==coast[st], ]
comb5 <- aggregate(V13~years, data=tmp5, mean, na.rm=T)
comb6 <- aggregate(V13~years, data=tmp6, mean, na.rm=T)
plot(comb5$years,comb5$V13,
main= coast[st], ylab = 'Temperature in C', lwd=2, type = 'l', ylim = c(0, 25))
abline(lm(comb5$V13~comb5$years))
plot(comb6$years,comb6$V13, main= coast[st],
type = 'l',lwd=2, col='red', ylim = c(5, 70), ylab = 'Precipitation in inch')
abline(lm(comb6$V13~comb6$years), col='red')
}
mtext("Temperature (black) vs precipitation (red) in Wyoming and some coastal areas from 1985 - 2014", side = 3, line = -.9, outer = TRUE, cex = .5, font=2, col='blue')