- Create a random daily temperature data for each year by using
rnorm.
- The data is transformed to obtain the seasonal variation with higher temperature in middle of the year.
- Learn how to use some tidyverse supported functions:
group_by, filter, summarise, %in%, etc.
Create a data sample
Use data_frame to create a normal distributed data of Temperature for 2 stations for 3 years.
library(tidyverse)
library(pander)
## This function create a data sample for a year with the higher temperature in the middle of the year
f.sample = function(nday, mean, sd) {
x = rnorm(nday, mean = mean, sd = sd)
return(x - abs(sort(x)-mean(x)) ^ 1.2)
}
## Creating data for 3 years
df = data_frame(date = seq(as.Date("1993-1-1"), as.Date("1995-12-31"), by = "1 day"),
st1 = c(f.sample(365, 24.0, 4.1), f.sample(365, 24.2, 3.8), f.sample(365, 24.5, 4.5)),
st2 = c(f.sample(365, 26.0, 2.5), f.sample(365, 26.1, 2.6), f.sample(365, 26.4, 2.5)))
df[1:5, ] %>% pander()
| 1993-01-01 |
1.488 |
9.392 |
| 1993-01-02 |
8.903 |
11.52 |
| 1993-01-03 |
8.2 |
14.41 |
| 1993-01-04 |
15.61 |
16.47 |
| 1993-01-05 |
8.789 |
18.81 |
Quick plot to check data
par(mfrow=c(2,2)) ## Define number of rows and columns for the plot
plot(df$date, df$st1, main = "Timeseries of station 1", xlab = "Date", ylab = "Temperature")
plot(df$date, df$st2, main = "Timeseries of station 2", xlab = "Date", ylab = "Temperature")
hist(df$st1, main = "Histogram of station 1", xlab = "Temperature", ylab = "Frequency")
hist(df$st2, main = "Histogram of station 2", xlab = "Temperature", ylab = "Frequency")

Re-organize the data
Use gather to change columns to row => for easier analysis and ploting
df = df %>% gather(stid, temp, st1:st2)
df[1:5, ] %>% pander()
| 1993-01-01 |
st1 |
1.488 |
| 1993-01-02 |
st1 |
8.903 |
| 1993-01-03 |
st1 |
8.2 |
| 1993-01-04 |
st1 |
15.61 |
| 1993-01-05 |
st1 |
8.789 |
Change format of Date to numeric => for easier group
df = df %>%
mutate(year = as.numeric(strftime(date, format = "%Y")),
mon = as.numeric(strftime(date, format = "%m")),
day = as.numeric(strftime(date, format = "%d"))) %>%
select(stid, year, mon, day, temp)
df[1:5, ] %>% pander()
| st1 |
1993 |
1 |
1 |
1.488 |
| st1 |
1993 |
1 |
2 |
8.903 |
| st1 |
1993 |
1 |
3 |
8.2 |
| st1 |
1993 |
1 |
4 |
15.61 |
| st1 |
1993 |
1 |
5 |
8.789 |
Above is the recommended format for most of the analysis.
Simple analysis
Calculate for whole data of each station)
df %>% group_by(stid) %>%
summarise(Mean = mean(temp),
Max = max(temp),
Min = min(temp),
SD = sd(temp),
Q25 = quantile(temp, 0.25),
Q75 = quantile(temp, 0.75)) %>%
pander()
| st1 |
19.9 |
35.27 |
-2.019 |
5.56 |
16.5 |
23.79 |
| st2 |
23.71 |
32.65 |
9.392 |
3.486 |
21.71 |
26.02 |
Calculate for each year of each station
df %>% group_by(stid, year) %>%
summarise(Mean = mean(temp),
Max = max(temp),
Min = min(temp),
SD = sd(temp),
Q25 = quantile(temp, 0.25),
Q75 = quantile(temp, 0.75)) %>%
pander()
| st1 |
1993 |
20.07 |
33.01 |
1.488 |
5.103 |
16.8 |
23.57 |
| st1 |
1994 |
20.23 |
30.38 |
6.436 |
4.69 |
17.07 |
23.87 |
| st1 |
1995 |
19.42 |
35.27 |
-2.019 |
6.671 |
15.87 |
23.86 |
| st2 |
1993 |
23.47 |
32.65 |
9.392 |
3.568 |
21.29 |
25.76 |
| st2 |
1994 |
23.62 |
32.41 |
12.04 |
3.454 |
21.64 |
25.98 |
| st2 |
1995 |
24.04 |
32.43 |
13.62 |
3.419 |
22.17 |
26.4 |
Calculate climatology of summer months for each station
df %>% filter(mon %in% c(6, 7, 8)) %>%
group_by(stid, mon) %>%
summarise(Mean = mean(temp),
Max = max(temp),
Min = min(temp),
SD = sd(temp),
Q25 = quantile(temp, 0.25),
Q75 = quantile(temp, 0.75)) %>%
pander()
| st1 |
6 |
23.28 |
32.89 |
13.83 |
3.87 |
21.26 |
25.74 |
| st1 |
7 |
23.13 |
33.64 |
12.7 |
4.635 |
19.5 |
26.13 |
| st1 |
8 |
22.67 |
31.11 |
12.82 |
4.034 |
20.43 |
25.17 |
| st2 |
6 |
25.75 |
32.37 |
19.59 |
2.945 |
23.76 |
27.19 |
| st2 |
7 |
25.86 |
32.41 |
20.61 |
2.724 |
23.87 |
28.07 |
| st2 |
8 |
25.36 |
32.13 |
18.15 |
2.601 |
23.88 |
27.07 |
Wanting to see climatology of all months for only station 1
df %>% filter(stid == "st1") %>%
group_by(mon) %>%
summarise(Mean = mean(temp),
Max = max(temp),
Min = min(temp),
SD = sd(temp),
Q25 = quantile(temp, 0.25),
Q75 = quantile(temp, 0.75)) %>%
pander()
| 1 |
12.58 |
22.34 |
-2.019 |
5.369 |
9.023 |
16.76 |
| 2 |
17.9 |
27.4 |
7.222 |
4.387 |
14.85 |
20.46 |
| 3 |
19.91 |
31.46 |
7.062 |
4.122 |
17.07 |
22.7 |
| 4 |
21.75 |
30.72 |
12.01 |
3.97 |
19.4 |
24.71 |
| 5 |
23 |
34.02 |
10.61 |
4.04 |
20.22 |
25.74 |
| 6 |
23.28 |
32.89 |
13.83 |
3.87 |
21.26 |
25.74 |
| 7 |
23.13 |
33.64 |
12.7 |
4.635 |
19.5 |
26.13 |
| 8 |
22.67 |
31.11 |
12.82 |
4.034 |
20.43 |
25.17 |
| 9 |
21.94 |
35.27 |
6.304 |
4.467 |
19.53 |
24.74 |
| 10 |
20.65 |
31.5 |
11.94 |
3.741 |
18.42 |
22.75 |
| 11 |
18.45 |
30.02 |
10.5 |
3.858 |
15.87 |
20.96 |
| 12 |
13.57 |
25.81 |
0.3977 |
5.045 |
10.17 |
16.99 |
Bonus
Using ggplot for better visualization. Demo of annual cycle graph
df %>% group_by(stid, mon) %>%
summarise(Mean = mean(temp),
Q25 = quantile(temp, 0.25),
Q75 = quantile(temp, 0.75)) %>%
ggplot() +
geom_col(aes(x = factor(mon), y = Mean, fill = stid),
alpha = 0.6, position = position_dodge()) +
geom_errorbar(aes(x = factor(mon), ymin = Q25, ymax = Q75, group = stid),
width = .5, position = position_dodge(.9)) +
scale_fill_manual(values = c("red", "blue")) +
scale_x_discrete(labels = c("J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D")) +
scale_y_continuous(limits = c(8, 28), oob = scales::squish) +
labs(title = "Annual cycle of temperature",
subtitle = "Lower & higher bounds of the error bars present Q25 & Q75, respectively",
x = "Month", y = "Mean temperature") +
theme_bw() +
theme(legend.position = "bottom")

LS0tCnRpdGxlOiAiQW5hbHlzaXM6IEJhc2ljICh0aWR5dmVyc2UpIgphdXRob3I6ICJieSBOWFRUTlgiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OiAKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgbnVtYmVyX3NlY3Rpb25zOiBUUlVFCiAgICB0aGVtZTogImRlZmF1bHQiCiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgZGV2OiAnc3ZnJyAgCi0tLQoKPHN0eWxlPgovKiByZXNpemUgdGhlIHdpZGdldCBjb250YWluZXIgKi8KLnBsb3RseSB7IAogIHdpZHRoOiA2MCUgIWltcG9ydGFudDsKICBtYXJnaW46IGF1dG87Cn0KCi8qIGNlbnRlciB0aGUgd2lkZ2V0ICovCmRpdi5zdmctY29udGFpbmVyIHsKICBtYXJnaW46IGF1dG8gIWltcG9ydGFudDsKfQo8L3N0eWxlPgoKCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIGZpZy5hbGlnbiA9ICJjZW50ZXIiKQpgYGAKCgoqIENyZWF0ZSBhIHJhbmRvbSBkYWlseSB0ZW1wZXJhdHVyZSBkYXRhIGZvciBlYWNoIHllYXIgYnkgdXNpbmcgYHJub3JtYC4KKiBUaGUgZGF0YSBpcyB0cmFuc2Zvcm1lZCB0byBvYnRhaW4gdGhlIHNlYXNvbmFsIHZhcmlhdGlvbiB3aXRoIGhpZ2hlciB0ZW1wZXJhdHVyZSBpbiBtaWRkbGUgb2YgdGhlIHllYXIuCiogTGVhcm4gaG93IHRvIHVzZSBzb21lICp0aWR5dmVyc2UqIHN1cHBvcnRlZCBmdW5jdGlvbnM6IGBncm91cF9ieWAsIGBmaWx0ZXJgLCBgc3VtbWFyaXNlYCwgYCVpbiVgLCBldGMuCgojIENyZWF0ZSBhIGRhdGEgc2FtcGxlCj4gVXNlIGBkYXRhX2ZyYW1lYCB0byBjcmVhdGUgYSBub3JtYWwgZGlzdHJpYnV0ZWQgZGF0YSBvZiBUZW1wZXJhdHVyZSBmb3IgMiBzdGF0aW9ucyBmb3IgMyB5ZWFycy4KCmBgYHtyIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShwYW5kZXIpCgojIyBUaGlzIGZ1bmN0aW9uIGNyZWF0ZSBhIGRhdGEgc2FtcGxlIGZvciBhIHllYXIgd2l0aCB0aGUgaGlnaGVyIHRlbXBlcmF0dXJlIGluIHRoZSBtaWRkbGUgb2YgdGhlIHllYXIKZi5zYW1wbGUgPSBmdW5jdGlvbihuZGF5LCBtZWFuLCBzZCkgewogIHggPSBybm9ybShuZGF5LCBtZWFuID0gbWVhbiwgc2QgPSBzZCkKICByZXR1cm4oeCAtIGFicyhzb3J0KHgpLW1lYW4oeCkpIF4gMS4yKQp9CgojIyBDcmVhdGluZyBkYXRhIGZvciAzIHllYXJzCmRmID0gZGF0YV9mcmFtZShkYXRlID0gc2VxKGFzLkRhdGUoIjE5OTMtMS0xIiksIGFzLkRhdGUoIjE5OTUtMTItMzEiKSwgYnkgPSAiMSBkYXkiKSwKICAgICAgICAgICAgICAgIHN0MSA9IGMoZi5zYW1wbGUoMzY1LCAyNC4wLCA0LjEpLCBmLnNhbXBsZSgzNjUsIDI0LjIsIDMuOCksIGYuc2FtcGxlKDM2NSwgMjQuNSwgNC41KSksCiAgICAgICAgICAgICAgICBzdDIgPSBjKGYuc2FtcGxlKDM2NSwgMjYuMCwgMi41KSwgZi5zYW1wbGUoMzY1LCAyNi4xLCAyLjYpLCBmLnNhbXBsZSgzNjUsIDI2LjQsIDIuNSkpKQoKZGZbMTo1LCBdICU+JSBwYW5kZXIoKQpgYGAKCiMgUXVpY2sgcGxvdCB0byBjaGVjayBkYXRhCgpgYGB7cn0KcGFyKG1mcm93PWMoMiwyKSkgIyMgRGVmaW5lIG51bWJlciBvZiByb3dzIGFuZCBjb2x1bW5zIGZvciB0aGUgcGxvdApwbG90KGRmJGRhdGUsIGRmJHN0MSwgbWFpbiA9ICJUaW1lc2VyaWVzIG9mIHN0YXRpb24gMSIsIHhsYWIgPSAiRGF0ZSIsIHlsYWIgPSAiVGVtcGVyYXR1cmUiKQpwbG90KGRmJGRhdGUsIGRmJHN0MiwgbWFpbiA9ICJUaW1lc2VyaWVzIG9mIHN0YXRpb24gMiIsIHhsYWIgPSAiRGF0ZSIsIHlsYWIgPSAiVGVtcGVyYXR1cmUiKQpoaXN0KGRmJHN0MSwgbWFpbiA9ICJIaXN0b2dyYW0gb2Ygc3RhdGlvbiAxIiwgeGxhYiA9ICJUZW1wZXJhdHVyZSIsIHlsYWIgPSAiRnJlcXVlbmN5IikKaGlzdChkZiRzdDIsIG1haW4gPSAiSGlzdG9ncmFtIG9mIHN0YXRpb24gMiIsIHhsYWIgPSAiVGVtcGVyYXR1cmUiLCB5bGFiID0gIkZyZXF1ZW5jeSIpCmBgYAoKIyBSZS1vcmdhbml6ZSB0aGUgZGF0YQo+IFVzZSBgZ2F0aGVyYCB0byBjaGFuZ2UgY29sdW1ucyB0byByb3cgPT4gZm9yIGVhc2llciBhbmFseXNpcyBhbmQgcGxvdGluZwoKYGBge3IgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0V9CmRmID0gZGYgJT4lIGdhdGhlcihzdGlkLCB0ZW1wLCBzdDE6c3QyKQoKZGZbMTo1LCBdICU+JSBwYW5kZXIoKQpgYGAKCkNoYW5nZSBmb3JtYXQgb2YgRGF0ZSB0byBudW1lcmljID0+IGZvciBlYXNpZXIgZ3JvdXAKCmBgYHtyIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpkZiA9IGRmICU+JSAgCiAgbXV0YXRlKHllYXIgPSBhcy5udW1lcmljKHN0cmZ0aW1lKGRhdGUsIGZvcm1hdCA9ICIlWSIpKSwKICAgICAgICAgbW9uICA9IGFzLm51bWVyaWMoc3RyZnRpbWUoZGF0ZSwgZm9ybWF0ID0gIiVtIikpLAogICAgICAgICBkYXkgID0gYXMubnVtZXJpYyhzdHJmdGltZShkYXRlLCBmb3JtYXQgPSAiJWQiKSkpICU+JQogIHNlbGVjdChzdGlkLCB5ZWFyLCBtb24sIGRheSwgdGVtcCkKCmRmWzE6NSwgXSAlPiUgcGFuZGVyKCkKYGBgCgo+IEFib3ZlIGlzIHRoZSByZWNvbW1lbmRlZCBmb3JtYXQgZm9yIG1vc3Qgb2YgdGhlIGFuYWx5c2lzLgoKIyBTaW1wbGUgYW5hbHlzaXMKCiMjIENhbGN1bGF0ZSBmb3Igd2hvbGUgZGF0YSBvZiBlYWNoIHN0YXRpb24pCgpgYGB7cn0KZGYgJT4lIGdyb3VwX2J5KHN0aWQpICU+JQogIHN1bW1hcmlzZShNZWFuID0gbWVhbih0ZW1wKSwKICAgICAgICAgICAgTWF4ID0gbWF4KHRlbXApLCAKICAgICAgICAgICAgTWluID0gbWluKHRlbXApLAogICAgICAgICAgICBTRCA9IHNkKHRlbXApLAogICAgICAgICAgICBRMjUgPSBxdWFudGlsZSh0ZW1wLCAwLjI1KSwKICAgICAgICAgICAgUTc1ID0gcXVhbnRpbGUodGVtcCwgMC43NSkpICU+JQogIHBhbmRlcigpCmBgYAoKIyMgQ2FsY3VsYXRlIGZvciBlYWNoIHllYXIgb2YgZWFjaCBzdGF0aW9uCmBgYHtyfQpkZiAlPiUgZ3JvdXBfYnkoc3RpZCwgeWVhcikgJT4lCiAgc3VtbWFyaXNlKE1lYW4gPSBtZWFuKHRlbXApLAogICAgICAgICAgICBNYXggPSBtYXgodGVtcCksIAogICAgICAgICAgICBNaW4gPSBtaW4odGVtcCksCiAgICAgICAgICAgIFNEID0gc2QodGVtcCksCiAgICAgICAgICAgIFEyNSA9IHF1YW50aWxlKHRlbXAsIDAuMjUpLAogICAgICAgICAgICBRNzUgPSBxdWFudGlsZSh0ZW1wLCAwLjc1KSkgJT4lCiAgcGFuZGVyKCkKYGBgCgojIyBDYWxjdWxhdGUgY2xpbWF0b2xvZ3kgb2Ygc3VtbWVyIG1vbnRocyBmb3IgZWFjaCBzdGF0aW9uCmBgYHtyfQpkZiAlPiUgZmlsdGVyKG1vbiAlaW4lIGMoNiwgNywgOCkpICU+JSAKICBncm91cF9ieShzdGlkLCBtb24pICU+JQogICBzdW1tYXJpc2UoTWVhbiA9IG1lYW4odGVtcCksCiAgICAgICAgICAgIE1heCA9IG1heCh0ZW1wKSwgCiAgICAgICAgICAgIE1pbiA9IG1pbih0ZW1wKSwKICAgICAgICAgICAgU0QgPSBzZCh0ZW1wKSwKICAgICAgICAgICAgUTI1ID0gcXVhbnRpbGUodGVtcCwgMC4yNSksCiAgICAgICAgICAgIFE3NSA9IHF1YW50aWxlKHRlbXAsIDAuNzUpKSAlPiUKICBwYW5kZXIoKQpgYGAKCldhbnRpbmcgdG8gc2VlIGNsaW1hdG9sb2d5IG9mIGFsbCBtb250aHMgZm9yIG9ubHkgc3RhdGlvbiAxCmBgYHtyfQpkZiAlPiUgZmlsdGVyKHN0aWQgPT0gInN0MSIpICU+JSAKICBncm91cF9ieShtb24pICU+JQogICBzdW1tYXJpc2UoTWVhbiA9IG1lYW4odGVtcCksCiAgICAgICAgICAgIE1heCA9IG1heCh0ZW1wKSwgCiAgICAgICAgICAgIE1pbiA9IG1pbih0ZW1wKSwKICAgICAgICAgICAgU0QgPSBzZCh0ZW1wKSwKICAgICAgICAgICAgUTI1ID0gcXVhbnRpbGUodGVtcCwgMC4yNSksCiAgICAgICAgICAgIFE3NSA9IHF1YW50aWxlKHRlbXAsIDAuNzUpKSAlPiUKICBwYW5kZXIoKQpgYGAKCiMgQm9udXMKVXNpbmcgYGdncGxvdGAgZm9yIGJldHRlciB2aXN1YWxpemF0aW9uLiBEZW1vIG9mIGFubnVhbCBjeWNsZSBncmFwaApgYGB7cn0KZGYgJT4lIGdyb3VwX2J5KHN0aWQsIG1vbikgJT4lCiAgc3VtbWFyaXNlKE1lYW4gPSBtZWFuKHRlbXApLAogICAgICAgICAgICBRMjUgPSBxdWFudGlsZSh0ZW1wLCAwLjI1KSwKICAgICAgICAgICAgUTc1ID0gcXVhbnRpbGUodGVtcCwgMC43NSkpICU+JQogIGdncGxvdCgpICsKICBnZW9tX2NvbChhZXMoeCA9IGZhY3Rvcihtb24pLCB5ID0gTWVhbiwgZmlsbCA9IHN0aWQpLCAKICAgICAgICAgICBhbHBoYSA9IDAuNiwgcG9zaXRpb24gPSBwb3NpdGlvbl9kb2RnZSgpKSArCiAgZ2VvbV9lcnJvcmJhcihhZXMoeCA9IGZhY3Rvcihtb24pLCB5bWluID0gUTI1LCB5bWF4ID0gUTc1LCBncm91cCA9IHN0aWQpLCAKICAgICAgICAgICAgICAgIHdpZHRoID0gLjUsIHBvc2l0aW9uID0gcG9zaXRpb25fZG9kZ2UoLjkpKSArIAogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGMoInJlZCIsICJibHVlIikpICsKICBzY2FsZV94X2Rpc2NyZXRlKGxhYmVscyA9IGMoIkoiLCAiRiIsICJNIiwgIkEiLCAiTSIsICJKIiwgIkoiLCAiQSIsICJTIiwgIk8iLCAiTiIsICJEIikpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobGltaXRzID0gYyg4LCAyOCksIG9vYiA9IHNjYWxlczo6c3F1aXNoKSArCiAgbGFicyh0aXRsZSA9ICJBbm51YWwgY3ljbGUgb2YgdGVtcGVyYXR1cmUiLAogICAgICAgc3VidGl0bGUgPSAiTG93ZXIgJiBoaWdoZXIgYm91bmRzIG9mIHRoZSBlcnJvciBiYXJzIHByZXNlbnQgUTI1ICYgUTc1LCByZXNwZWN0aXZlbHkiLAogICAgICAgeCA9ICJNb250aCIsIHkgPSAiTWVhbiB0ZW1wZXJhdHVyZSIpICsKICB0aGVtZV9idygpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAiYm90dG9tIikKYGBgCgoK