Install packages if needed. Go to the packages tab and install dplyr
and stats packages (also you can use the install.packages
function to install packages from the console, e.g:
install.packages('dplyr')). Then, load packages if
needed:
# tidyverse load dplyr, tidyr, ggplot and others useful packages.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# Setting the theme for the plots.
theme_set(theme_bw())
Use the read.csv function to load
precip_car.csv Each row contains the ID, Name, Elevation and
monthly precipitation (p01…p12) for weather stations:
# Define the file path based on the workspace directory (in this case root.dir)
file_path <- "tareas/1_tarea_intro_r/precip_car.csv"
precipitation <- read.csv(file_path)
summary(precipitation)
## Id Nombre Latitud Longitud
## Min. :12010090 Length:242 Min. : 7.748 Min. :-76.72
## 1st Qu.:15052510 Class :character 1st Qu.: 9.287 1st Qu.:-75.39
## Median :25020645 Mode :character Median : 9.972 Median :-74.75
## Mean :21722494 Mean :10.009 Mean :-74.38
## 3rd Qu.:28040240 3rd Qu.:10.718 3rd Qu.:-73.35
## Max. :29065020 Max. :12.345 Max. :-71.27
## Altitud p01 p02 p03
## Min. : 1.00 Min. : 0.1429 Min. : 0.03143 Min. : 0.9756
## 1st Qu.: 23.50 1st Qu.: 8.2610 1st Qu.: 8.03171 1st Qu.: 19.3035
## Median : 67.50 Median : 14.8902 Median : 17.81829 Median : 36.9110
## Mean : 99.71 Mean : 18.7426 Mean : 21.78586 Mean : 41.3556
## 3rd Qu.:120.00 3rd Qu.: 22.4701 3rd Qu.: 28.77378 3rd Qu.: 58.6982
## Max. :900.00 Max. :145.2263 Max. :151.92895 Max. :220.0395
## p04 p05 p06 p07
## Min. : 0.878 Min. : 12.84 Min. : 2.415 Min. : 2.22
## 1st Qu.: 72.887 1st Qu.:124.32 1st Qu.: 92.814 1st Qu.: 83.41
## Median : 99.753 Median :159.95 Median :120.887 Median :114.34
## Mean :100.795 Mean :155.91 Mean :122.514 Mean :117.71
## 3rd Qu.:126.951 3rd Qu.:188.52 3rd Qu.:152.900 3rd Qu.:153.95
## Max. :317.761 Max. :474.00 Max. :411.632 Max. :497.75
## p08 p09 p10 p11
## Min. : 10.12 Min. : 30.76 Min. : 66.67 Min. : 39.04
## 1st Qu.:111.35 1st Qu.:131.12 1st Qu.:142.24 1st Qu.: 94.73
## Median :147.51 Median :161.60 Median :171.33 Median :117.91
## Mean :147.19 Mean :160.12 Mean :182.39 Mean :127.24
## 3rd Qu.:181.84 3rd Qu.:188.14 3rd Qu.:208.37 3rd Qu.:146.72
## Max. :572.68 Max. :508.49 Max. :598.42 Max. :507.58
## p12 Biome Depto
## Min. : 8.117 Length:242 Length:242
## 1st Qu.: 33.123 Class :character Class :character
## Median : 42.861 Mode :character Mode :character
## Mean : 50.057
## 3rd Qu.: 54.410
## Max. :238.295
Improve the data frame converting the categorical variables (Depto and Biome) from character to factor:
precipitation$Depto <- factor(precipitation$Depto)
precipitation$Biome <- factor(precipitation$Biome)
summary(precipitation)
## Id Nombre Latitud Longitud
## Min. :12010090 Length:242 Min. : 7.748 Min. :-76.72
## 1st Qu.:15052510 Class :character 1st Qu.: 9.287 1st Qu.:-75.39
## Median :25020645 Mode :character Median : 9.972 Median :-74.75
## Mean :21722494 Mean :10.009 Mean :-74.38
## 3rd Qu.:28040240 3rd Qu.:10.718 3rd Qu.:-73.35
## Max. :29065020 Max. :12.345 Max. :-71.27
##
## Altitud p01 p02 p03
## Min. : 1.00 Min. : 0.1429 Min. : 0.03143 Min. : 0.9756
## 1st Qu.: 23.50 1st Qu.: 8.2610 1st Qu.: 8.03171 1st Qu.: 19.3035
## Median : 67.50 Median : 14.8902 Median : 17.81829 Median : 36.9110
## Mean : 99.71 Mean : 18.7426 Mean : 21.78586 Mean : 41.3556
## 3rd Qu.:120.00 3rd Qu.: 22.4701 3rd Qu.: 28.77378 3rd Qu.: 58.6982
## Max. :900.00 Max. :145.2263 Max. :151.92895 Max. :220.0395
##
## p04 p05 p06 p07
## Min. : 0.878 Min. : 12.84 Min. : 2.415 Min. : 2.22
## 1st Qu.: 72.887 1st Qu.:124.32 1st Qu.: 92.814 1st Qu.: 83.41
## Median : 99.753 Median :159.95 Median :120.887 Median :114.34
## Mean :100.795 Mean :155.91 Mean :122.514 Mean :117.71
## 3rd Qu.:126.951 3rd Qu.:188.52 3rd Qu.:152.900 3rd Qu.:153.95
## Max. :317.761 Max. :474.00 Max. :411.632 Max. :497.75
##
## p08 p09 p10 p11
## Min. : 10.12 Min. : 30.76 Min. : 66.67 Min. : 39.04
## 1st Qu.:111.35 1st Qu.:131.12 1st Qu.:142.24 1st Qu.: 94.73
## Median :147.51 Median :161.60 Median :171.33 Median :117.91
## Mean :147.19 Mean :160.12 Mean :182.39 Mean :127.24
## 3rd Qu.:181.84 3rd Qu.:188.14 3rd Qu.:208.37 3rd Qu.:146.72
## Max. :572.68 Max. :508.49 Max. :598.42 Max. :507.58
##
## p12 Biome Depto
## Min. : 8.117 Sub-Humid Tropical Forests: 5 La Guajira:45
## 1st Qu.: 33.123 Tropical deserts : 17 Cordoba :42
## Median : 42.861 Tropical dry forests :212 Magdalena :37
## Mean : 50.057 Tropical rainforests : 8 Cesar :36
## 3rd Qu.: 54.410 Bolivar :34
## Max. :238.295 Sucre :23
## (Other) :25
rowSums:# the data is sliced to only sum the precipitation columns.
precipitation$p_anual <- rowSums(precipitation[,6:17])
head(precipitation)
## Id Nombre Latitud Longitud Altitud
## 1 12045010 ARBOLETES [12045010] 8.846944 -76.43194 4
## 2 12015070 AEROPUERTO LOS CEDROS [12015070] 7.816389 -76.71789 41
## 3 12010110 PRADO MAR [12010110] 7.987417 -76.63853 5
## 4 12015020 UNIBAN [12015020] 7.825917 -76.65133 43
## 5 12010090 TRIGANA [12010090] 7.747667 -76.71189 4
## 6 14010100 PINTADA LA [14010100] 10.955111 -74.99500 200
## p01 p02 p03 p04 p05 p06 p07
## 1 72.6317073 53.2756098 64.448780 133.06585 176.6366 155.76829 162.49268
## 2 145.2263158 151.9289474 220.039474 302.97368 330.0342 288.39474 326.23947
## 3 91.3585366 118.3658537 128.648780 202.77073 247.9512 221.45366 202.57805
## 4 124.4073171 111.1000000 154.853659 234.59512 275.4610 241.79756 254.85366
## 5 135.9243902 141.6634146 183.790244 259.66829 312.9659 314.43415 286.25610
## 6 0.1428571 0.8214286 3.071429 23.00357 109.5107 95.42857 69.48929
## p08 p09 p10 p11 p12 Biome Depto
## 1 194.9610 164.2829 175.6976 157.1049 96.85122 Tropical dry forests Antioquia
## 2 283.7211 291.4526 288.5105 294.8237 238.29474 Tropical rainforests Antioquia
## 3 243.4951 218.1707 202.2098 187.5366 173.50488 Tropical rainforests Antioquia
## 4 249.7488 270.7122 265.2293 259.2293 193.31951 Tropical rainforests Antioquia
## 5 294.0927 273.6341 257.4805 251.8878 238.04878 Tropical rainforests Antioquia
## 6 127.1714 184.9643 189.9821 129.7321 33.50000 Tropical dry forests Atlantico
## p_anual
## 1 1607.2171
## 2 3161.6395
## 3 2238.0439
## 4 2635.3073
## 5 2949.8463
## 6 966.8179
aggregate function:Parameters:
x (first argument by default): The data to be aggregate
by groups.by: The classification of all entries in x
needed to be grouped.FUN: Function to apply to grouped data.mean_anual_prec_depto <- aggregate(
precipitation$p_anual, by = list(precipitation$Depto), FUN = mean
)
colnames(mean_anual_prec_depto) <- c('Depto', 'mean_anual_precipitation')
mean_anual_prec_depto
## Depto mean_anual_precipitation
## 1 Antioquia 2518.4108
## 2 Atlantico 996.3746
## 3 Bolivar 1490.1788
## 4 Cesar 1415.9017
## 5 Cordoba 1435.9659
## 6 La Guajira 673.1426
## 7 Magdalena 1295.4668
## 8 Sucre 1251.9409
aggregate function:mean_anual_prec_biome <- aggregate(
precipitation$p_anual, by = list(precipitation$Biome), FUN = mean
)
colnames(mean_anual_prec_biome) <- c('Biome', 'mean_anual_precipitation')
mean_anual_prec_biome
## Biome mean_anual_precipitation
## 1 Sub-Humid Tropical Forests 2483.0590
## 2 Tropical deserts 379.0881
## 3 Tropical dry forests 1246.8179
## 4 Tropical rainforests 2287.7244
ggplot packages:ggplot(data = precipitation) +
geom_boxplot(
mapping = aes(
x = Biome,
y = p_anual,
fill = Biome,
),
show.legend = FALSE
) +
labs(
title = "Mean anual precipitation by biome",
x = "Biomes",
y = "Precipitation [mm]"
)
dim(precipitation)
## [1] 242 20
The dataframe have 242 entries (rows) and 20 variables (columns).
Overview points 4 and 5.
Yes, in theory, deserts and tropical dry forest experiment less precipitations than sub-humid tropical forest and tropical forest.