Welcome to the first Assignment! Try to solve the following exercises, and if you do not remember how to do code certain things, have a look again at the previous scripts. Programming is about learning by doing! So, do not hesitate to look for help in the internet, too. Remember that there is almost always more than just one way to solve a problem in R!
library(viridis)
library(tigris)
library(sp)
library(rgdal)
library(grid)
library(gridExtra)
library(dplyr)
library(reshape2)
library(ggplot2)
Import the file with the yield data that corresponds to the crop assigned to your group from the folder “yield_by_crop”. You find the list of crops for each group in the file “M01_Overview.pptx”. Use the melt() function to re-organize the data so that all years are in one column, and all yield measurements are in one column. Transform the year-column into a numeric format.
dir <- "C:/Users/Max/Desktop/IAMO_neu/eLearning/Module_1_Data_visualization/"
data_raw <- read.csv(paste0(dir, "M01_Part01_Agricultural-Statistics/data/yield_by_crop/pomaceous_fruits.csv"), sep=";")
data <- melt(data_raw, id.vars=c("region", "region_type", "crop", "indicator"))
data <- data[,-c(2:4)]
names(data)[2:3] <- c("year", "yield")
data$year <- as.numeric(substr(data$year, 6, 9))
head(data)
| region | year | yield |
|---|---|---|
| Aragatsotn | 2020 | 113.0 |
| Ararat | 2020 | 186.6 |
| Armavir | 2020 | 120.6 |
| Gegharkunik | 2020 | 96.3 |
| Lori | 2020 | 37.2 |
| Kotayk | 2020 | 53.5 |
Using the function ggplot(), create a grouped bar plot of the yield data of your crop. Each marz should form one group, and each group should consist of three bars that represent the yield from the years 2005, 2010 and 2015. Export your plot as PNG.
data$year <- as.factor(data$year)
data_sel <- filter(data, year %in% c(2005, 2010, 2015))
plot1 <- ggplot(data=data_sel, aes(x=region, y=yield, fill=year)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Yield of pomaceous fruits") +
xlab("") +
ylab("Yield (tonnes per hectare)") +
theme(axis.text.x = element_text(angle = 45, hjust=1),
text = element_text(size = 20))
plot1
# png(paste(dir, "ex02.png"), height=500, width=700)
# plot1
# dev.off()
Create a multipart/facetted bar plot with ggplot(). Each facet should represent one marz. In each facet, each bar should represent the yield of your crop in one year from 2010 to 2020. Export your plot as PNG.
data_sel2 <- filter(data, year %in% c(2010:2020))
plot2 <- ggplot(data=data_sel2, aes(x=year, y=yield)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Yield of pomaceous fruits") +
xlab("") +
ylab("Yield (tonnes per hectare)") +
facet_wrap(~region, scales="free") +
theme(axis.text.x = element_text(angle = 45, hjust=1),
#text = element_text(size = 20)
)
plot2
# png(paste(dir, "ex03.png"), height=500, width=700)
# plot2
# dev.off()
Use spplot() to create a marz-level map of the yield of your crop in 2020. Export your map as PNG.
data_sel3 <- filter(data, year == c(2020))
data_sel3$region[data_sel3$region=="Vayots dzor"] <- "Vayotz Dzor"
marzes <- readOGR(paste0(dir, "/shapefiles/ARM_marzes_short_simplified_500.shp"), verbose=F)
marzes_data <- geo_join(marzes, data_sel3, "div_short", "region", how = 'left')
map1 <- spplot(marzes_data, "yield",
col.regions = rev(rocket(21)[6:21]),
main=list(cex=1, label="Yield of pomaceous fruits in 2020"),
colorkey=list(labels=list(cex=0.8)))
map1
# png(paste(dir, "ex04.png"), height=500, width=700)
# map1
# dev.off()
Import the table “all_stats.csv” and select the entries corresponding to the crop assigned to your group. Using spplot() and grid.arrange(), create a multipart figure that consists of four maps: marz-level yield, production amount, sown area and harvested area of your crop averaged for the period 2005 to 2015. Export your map as PNG.
all_stats <- read.csv(paste0(dir, "M01_Part01_Agricultural-Statistics/data/all_stats.csv"))
all_stats$region[all_stats$region=="Vayots dzor"] <- "Vayotz Dzor"
all_stats_sel <- filter(all_stats, crop == "Pomaceous Fruits", region %in% c("00_ARMENIA", "Yerevan c.") == FALSE, year %in% c(2005:2015))
all_stats_sel_mean <- all_stats_sel %>% group_by(region, crop, indicator) %>% summarize(mean=mean(na.omit(value)))
yield <- filter(all_stats_sel_mean, indicator == "Yield")
production <- filter(all_stats_sel_mean, indicator == "Production")
sownarea <- filter(all_stats_sel_mean, indicator == "Sown area")
harvestedarea <- filter(all_stats_sel_mean, indicator == "Harvested area")
marzes_yield <- geo_join(marzes, yield, "div_short", "region", how = 'left')
marzes_production <- geo_join(marzes, production, "div_short", "region", how = 'left')
marzes_sownarea <- geo_join(marzes, sownarea, "div_short", "region", how = 'left')
marzes_harvestedarea <- geo_join(marzes, harvestedarea, "div_short", "region", how = 'left')
map_yield <- spplot(marzes_yield, "mean", col.regions = rev(rocket(21)[6:21]),
main=list(cex=1, label="Yield", colorkey=list(labels=list(cex=0.8))))
map_production <- spplot(marzes_production, "mean", col.regions = rev(rocket(21)[6:21]),
main=list(cex=1, label="Production", colorkey=list(labels=list(cex=0.8))))
map_sownarea <- spplot(marzes_sownarea, "mean", col.regions = rev(rocket(21)[6:21]),
main=list(cex=1, label="Sown area", colorkey=list(labels=list(cex=0.8))))
map_harvestedarea <- spplot(marzes_harvestedarea, "mean", col.regions = rev(rocket(21)[6:21]),
main=list(cex=1, label="Harvested area", colorkey=list(labels=list(cex=0.8))))
grid.arrange(map_yield, map_production, map_sownarea, map_harvestedarea,
ncol=2,
top = textGrob("Pomaceous Fruits from 2005 to 2015",
vjust = 1,
gp = gpar(fontface = "bold", cex = 1.5)))
# png(paste(dir, "ex05.png"), height=500, width=700)
# grid.arrange(map_yield, map_production, map_sownarea, map_harvestedarea,
# ncol=2,
# top = textGrob("Pomaceous Fruits from 2005 to 2015",
# vjust = 1,
# gp = gpar(fontface = "bold", cex = 1.5)))
# dev.off()
Import the file with the phenological observations that corresponds to the crop assigned to your group from the folder “phenology_by_crop”. You find the list of crops for each group in the file “M01_Overview.pptx”. Using the functions group_by() and summarize(), calculate the earliest starting date of each phenological phase of the crop assigned to you, for each possible combination of year and variety.
pear <- read.csv("C:/Users/Max/Desktop/IAMO_neu/eLearning/Module_1_Data_visualization/M01_Part02_Phenological-Observations/data/phenology_by_crop/pear.csv", sep=";")
pear_melted <- melt(pear, id.vars = c("year", "station", "crop", "variety"))
names(pear_melted)[5:6] <- c("phase", "DOY")
pear_melted$phase <- substring(pear_melted$phase, 2)
min_year_variety <- pear_melted %>% group_by(year, variety, phase) %>% summarize(min_DOY=min(DOY)) %>% as.data.frame()
head(min_year_variety)
| year | variety | phase | min_DOY |
|---|---|---|---|
| 1995 | Autumn | 01_Bud_swelling | 67 |
| 1995 | Autumn | 02_Bud_bursting | 75 |
| 1995 | Autumn | 03_Emergence_1st_leaf | 81 |
| 1995 | Autumn | 04_Flower_bud_splitting | 98 |
| 1995 | Autumn | 05_Blossoming | 106 |
| 1995 | Autumn | 06_Cease_blossoming | 120 |
Using ggplot(), create a figure of the DOYs of the harvest dates, with one box plot for each variety. Export your plot as PNG.
pear_melted_harvest <- filter(pear_melted, phase == "09_Harvest")
plot3 <- ggplot(data=pear_melted_harvest, aes(x=variety, y=DOY, fill=variety)) +
geom_jitter(aes(color=variety), size=2, alpha=0.2) +
geom_boxplot(alpha=0.7) +
theme(axis.text.x=element_text(angle=45, hjust=1),
text = element_text(size = 20),
legend.position="none") +
ggtitle("Harvest date of pear") +
xlab("") +
ylab("DOY (day of the year)")
plot3
# png(paste(dir, "ex07.png"), height=500, width=700)
# plot3
# dev.off()
Create a subsample of your data that contains observations of all phenological phases from 2000 onwards. Use ggplot() to create a facetted figure that shows the DOYs of the phenological observations on the y-axis. Each facet should represent one agrometeorological station, and each facet should contain one box plot for each phenological phase. Export your plot as PNG.
pear_melted_sel <- filter(pear_melted, year >= 2000)
plot4 <- ggplot(data=pear_melted_sel, aes(x=phase, y=DOY, fill=phase)) +
geom_jitter(aes(color=phase), size=2, alpha=0.4) +
geom_boxplot(alpha=0.7) +
theme(axis.text.x=element_text(angle=90, hjust=1),
#text = element_text(size = 20),
legend.position="none") +
ggtitle("Phenological stages of pear, by station") +
xlab("") +
ylab("Day of the year (DOY)") +
facet_wrap(~station, scales="free")
plot4
# png(paste(dir, "ex08.png"), height=500, width=700)
# plot4
# dev.off()
Import the table “all_stats.csv” and select the entries corresponding to the crop you analyzed in exercises 01 to 05. Use ggplot() to create a marz-level map of the 2020 sown area. Export your map as PNG.
all_stats_sel2 <- filter(all_stats, crop == "Pomaceous Fruits", region %in% c("00_ARMENIA", "Yerevan c.") == FALSE, year == 2020, indicator == "Sown area")
marzes_fort <- fortify(marzes)
marzes_stats <- geo_join(marzes, all_stats_sel2, "div_short", "region", how = 'left')
marzes_stats@data$id <- c(0:10)
marzes_fort_stats <- merge(marzes_fort, marzes_stats@data, by="id")
map3 <- ggplot(data=marzes_fort_stats, aes(x=long, y=lat, group=group, fill=value)) +
geom_polygon(color="black", size=0.25) +
coord_equal() +
scale_fill_gradientn(colors=viridis(10), name="Sown area (ha)") +
theme(text = element_text(size = 20),
axis.title=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank()) +
ggtitle("Sown area of pomaceous fruits in 2020")
map3
# png(paste(dir, "ex09.png"), height=500, width=700)
# map3
# dev.off()
Import the table “all_stats.csv” and select the entries corresponding to the crop you analyzed in exercises 01 to 05. Use ggplot() to create a facetted figure with 6 maps. These maps should represent the mean sown area, production and yield across two time periods: 2005 to 2012, and 2013 to 2020. Arrange the 6 maps using the function facet_grid() (not facet_wrap()!). Export your map as PNG.
all_stats_2005_2012 <- filter(all_stats, crop == "Pomaceous Fruits", region %in% c("00_ARMENIA", "Yerevan c.") == FALSE, year %in% c(2005:2012), indicator != "Harvested area")
all_stats_2005_2012_mean <- all_stats_2005_2012 %>% group_by(region, indicator) %>% summarize(mean=mean(value)) %>% as.data.frame()
all_stats_2005_2012_mean$period <- "2005-2012"
all_stats_2013_2020 <- filter(all_stats, crop == "Pomaceous Fruits", region %in% c("00_ARMENIA", "Yerevan c.") == FALSE, year %in% c(2013:2020), indicator != "Harvested area")
all_stats_2013_2020_mean <- all_stats_2013_2020 %>% group_by(region, indicator) %>% summarize(mean=mean(value)) %>% as.data.frame()
all_stats_2013_2020_mean$period <- "2013-2020"
dat <- rbind(all_stats_2005_2012_mean, all_stats_2013_2020_mean)
marzes2 <- marzes_fort_stats[,c(1:7,9)]
plotdata <- left_join(marzes2, dat, by="region")
plotdata <- plotdata[complete.cases(plotdata),]
map4 <- ggplot() +
geom_polygon(data=plotdata, aes(x=long, y=lat, group=group, fill=mean), color="black", size=0.25) +
coord_equal() +
scale_fill_viridis_c(trans = "log") +
facet_grid(indicator~period) +
ggtitle("Pomaceous Fruits") +
xlab("") +
ylab("") +
theme(text = element_text(size = 20),
axis.title=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank())
map4
# png(paste(dir, "ex10.png"), height=500, width=700)
# map4
# dev.off()