library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/key_crop_yields.csv"
df_crop <- read_csv(url)
## Rows: 13075 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (12): Year, Wheat (tonnes per hectare), Rice (tonnes per hectare), Maize...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##Menampilkan kolom Entity, Year, Potatoes, dan Cassava saja
select(df_crop, Entity, Year, `Potatoes (tonnes per hectare)`, `Cassava (tonnes per hectare)`)
## # A tibble: 13,075 × 4
## Entity Year `Potatoes (tonnes per hectare)` Cassava (tonnes per hecta…¹
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 1961 8.67 NA
## 2 Afghanistan 1962 7.67 NA
## 3 Afghanistan 1963 8.13 NA
## 4 Afghanistan 1964 8.6 NA
## 5 Afghanistan 1965 8.8 NA
## 6 Afghanistan 1966 9.07 NA
## 7 Afghanistan 1967 9.8 NA
## 8 Afghanistan 1968 10 NA
## 9 Afghanistan 1969 10.2 NA
## 10 Afghanistan 1970 9.54 NA
## # ℹ 13,065 more rows
## # ℹ abbreviated name: ¹`Cassava (tonnes per hectare)`
##Mengeliminasi kolom Soybeans, Beans, dan Peas
select(df_crop, -c(`Soybeans (tonnes per hectare)`,
`Beans (tonnes per hectare)`,
`Peas (tonnes per hectare)`))
## # A tibble: 13,075 × 11
## Entity Code Year `Wheat (tonnes per hectare)` Rice (tonnes per hecta…¹
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1961 1.02 1.52
## 2 Afghanistan AFG 1962 0.974 1.52
## 3 Afghanistan AFG 1963 0.832 1.52
## 4 Afghanistan AFG 1964 0.951 1.73
## 5 Afghanistan AFG 1965 0.972 1.73
## 6 Afghanistan AFG 1966 0.867 1.52
## 7 Afghanistan AFG 1967 1.12 1.92
## 8 Afghanistan AFG 1968 1.16 1.95
## 9 Afghanistan AFG 1969 1.19 1.98
## 10 Afghanistan AFG 1970 0.956 1.81
## # ℹ 13,065 more rows
## # ℹ abbreviated name: ¹`Rice (tonnes per hectare)`
## # ℹ 6 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>,
## # `Cassava (tonnes per hectare)` <dbl>, `Barley (tonnes per hectare)` <dbl>,
## # `Cocoa beans (tonnes per hectare)` <dbl>,
## # `Bananas (tonnes per hectare)` <dbl>
df_crop <- read_csv(url)
## Rows: 13075 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (12): Year, Wheat (tonnes per hectare), Rice (tonnes per hectare), Maize...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(df_crop)
## Rows: 13,075
## Columns: 14
## $ Entity <chr> "Afghanistan", "Afghanistan", "Afgh…
## $ Code <chr> "AFG", "AFG", "AFG", "AFG", "AFG", …
## $ Year <dbl> 1961, 1962, 1963, 1964, 1965, 1966,…
## $ `Wheat (tonnes per hectare)` <dbl> 1.0220, 0.9735, 0.8317, 0.9510, 0.9…
## $ `Rice (tonnes per hectare)` <dbl> 1.5190, 1.5190, 1.5190, 1.7273, 1.7…
## $ `Maize (tonnes per hectare)` <dbl> 1.4000, 1.4000, 1.4260, 1.4257, 1.4…
## $ `Soybeans (tonnes per hectare)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Potatoes (tonnes per hectare)` <dbl> 8.6667, 7.6667, 8.1333, 8.6000, 8.8…
## $ `Beans (tonnes per hectare)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Peas (tonnes per hectare)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Cassava (tonnes per hectare)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Barley (tonnes per hectare)` <dbl> 1.0800, 1.0800, 1.0800, 1.0857, 1.0…
## $ `Cocoa beans (tonnes per hectare)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Bananas (tonnes per hectare)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
##Tahun berapa saja hasil panen padi (Rice) di Indonesia yang nilainya di bawah 2 ton?
data_filter <- filter(df_crop, Entity == "Indonesia", `Rice (tonnes per hectare)` < 2)
data_filter
## # A tibble: 7 × 14
## Entity Code Year `Wheat (tonnes per hectare)` `Rice (tonnes per hectare)`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Indonesia IDN 1961 NA 1.76
## 2 Indonesia IDN 1962 NA 1.79
## 3 Indonesia IDN 1963 NA 1.72
## 4 Indonesia IDN 1964 NA 1.76
## 5 Indonesia IDN 1965 NA 1.77
## 6 Indonesia IDN 1966 NA 1.77
## 7 Indonesia IDN 1967 NA 1.76
## # ℹ 9 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Soybeans (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>, `Beans (tonnes per hectare)` <dbl>,
## # `Peas (tonnes per hectare)` <dbl>, `Cassava (tonnes per hectare)` <dbl>,
## # `Barley (tonnes per hectare)` <dbl>,
## # `Cocoa beans (tonnes per hectare)` <dbl>,
## # `Bananas (tonnes per hectare)` <dbl>
##Negara apa saja yang punya hasil gandum (Wheat) di atas 5 ton pada tahun 2000 ke atas?
data_filter <- filter(df_crop, Year >= 2000, `Wheat (tonnes per hectare)` > 5)
data_filter
## # A tibble: 424 × 14
## Entity Code Year `Wheat (tonnes per hectare)` `Rice (tonnes per hectare)`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Austria AUT 2001 5.24 NA
## 2 Austria AUT 2004 5.92 NA
## 3 Austria AUT 2005 5.03 NA
## 4 Austria AUT 2008 5.69 NA
## 5 Austria AUT 2010 5.01 NA
## 6 Austria AUT 2011 5.85 NA
## 7 Austria AUT 2013 5.37 NA
## 8 Austria AUT 2014 5.92 NA
## 9 Austria AUT 2015 5.70 NA
## 10 Austria AUT 2016 6.25 NA
## # ℹ 414 more rows
## # ℹ 9 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Soybeans (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>, `Beans (tonnes per hectare)` <dbl>,
## # `Peas (tonnes per hectare)` <dbl>, `Cassava (tonnes per hectare)` <dbl>,
## # `Barley (tonnes per hectare)` <dbl>,
## # `Cocoa beans (tonnes per hectare)` <dbl>, …
##Bagaimana cara memunculkan data negara Indonesia dan Malaysia khusus untuk tahun 2015 saja?
data_filter <- filter(df_crop, Entity %in% c("Indonesia", "Malaysia"), Year == 2015)
data_filter
## # A tibble: 2 × 14
## Entity Code Year `Wheat (tonnes per hectare)` `Rice (tonnes per hectare)`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Indonesia IDN 2015 NA 5.34
## 2 Malaysia MYS 2015 NA 4.02
## # ℹ 9 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Soybeans (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>, `Beans (tonnes per hectare)` <dbl>,
## # `Peas (tonnes per hectare)` <dbl>, `Cassava (tonnes per hectare)` <dbl>,
## # `Barley (tonnes per hectare)` <dbl>,
## # `Cocoa beans (tonnes per hectare)` <dbl>,
## # `Bananas (tonnes per hectare)` <dbl>
##Negara mana yang punya hasil jagung (Maize) paling rendah di tahun 2020?
data_arrange <- arrange(df_crop, `Maize (tonnes per hectare)`)
data_arrange_2020 <- filter(data_arrange, Year == 2020)
head(data_arrange_2020, 1)
## # A tibble: 0 × 14
## # ℹ 14 variables: Entity <chr>, Code <chr>, Year <dbl>,
## # Wheat (tonnes per hectare) <dbl>, Rice (tonnes per hectare) <dbl>,
## # Maize (tonnes per hectare) <dbl>, Soybeans (tonnes per hectare) <dbl>,
## # Potatoes (tonnes per hectare) <dbl>, Beans (tonnes per hectare) <dbl>,
## # Peas (tonnes per hectare) <dbl>, Cassava (tonnes per hectare) <dbl>,
## # Barley (tonnes per hectare) <dbl>, Cocoa beans (tonnes per hectare) <dbl>,
## # Bananas (tonnes per hectare) <dbl>
##Mengurutkan data Indonesia dari hasil kentang (Potatoes) yang paling tinggi.
data_filter <- filter(df_crop, Entity == "Indonesia")
data_arrange <- arrange(data_filter, desc(`Potatoes (tonnes per hectare)`))
data_arrange
## # A tibble: 58 × 14
## Entity Code Year `Wheat (tonnes per hectare)` Rice (tonnes per hectare…¹
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Indonesia IDN 2018 NA 5.19
## 2 Indonesia IDN 2016 NA 5.24
## 3 Indonesia IDN 2015 NA 5.34
## 4 Indonesia IDN 2014 NA 5.13
## 5 Indonesia IDN 2006 NA 4.62
## 6 Indonesia IDN 2008 NA 4.89
## 7 Indonesia IDN 1995 NA 4.35
## 8 Indonesia IDN 2012 NA 5.14
## 9 Indonesia IDN 2009 NA 5.00
## 10 Indonesia IDN 2005 NA 4.57
## # ℹ 48 more rows
## # ℹ abbreviated name: ¹`Rice (tonnes per hectare)`
## # ℹ 9 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Soybeans (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>, `Beans (tonnes per hectare)` <dbl>,
## # `Peas (tonnes per hectare)` <dbl>, `Cassava (tonnes per hectare)` <dbl>,
## # `Barley (tonnes per hectare)` <dbl>, …
##Membuat kolom Rice_Status berisi teks “Tinggi” jika padi > 4 ton, dan “Rendah” jika di bawahnya.
data_mutate <- mutate(df_crop,Rice_Status = ifelse(`Rice (tonnes per hectare)` > 4, "Tinggi", "Rendah"))
data_mutate
## # A tibble: 13,075 × 15
## Entity Code Year `Wheat (tonnes per hectare)` Rice (tonnes per hecta…¹
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1961 1.02 1.52
## 2 Afghanistan AFG 1962 0.974 1.52
## 3 Afghanistan AFG 1963 0.832 1.52
## 4 Afghanistan AFG 1964 0.951 1.73
## 5 Afghanistan AFG 1965 0.972 1.73
## 6 Afghanistan AFG 1966 0.867 1.52
## 7 Afghanistan AFG 1967 1.12 1.92
## 8 Afghanistan AFG 1968 1.16 1.95
## 9 Afghanistan AFG 1969 1.19 1.98
## 10 Afghanistan AFG 1970 0.956 1.81
## # ℹ 13,065 more rows
## # ℹ abbreviated name: ¹`Rice (tonnes per hectare)`
## # ℹ 10 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Soybeans (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>, `Beans (tonnes per hectare)` <dbl>,
## # `Peas (tonnes per hectare)` <dbl>, `Cassava (tonnes per hectare)` <dbl>,
## # `Barley (tonnes per hectare)` <dbl>, …
##Berapa rata-rata hasil panen pisang (Bananas) di Indonesia dari seluruh tahun yang ada?
data_filter <- filter(df_crop, Entity == "Indonesia")
summarise(data_filter, mean(`Bananas (tonnes per hectare)`, na.rm = TRUE))
## # A tibble: 1 × 1
## `mean(\`Bananas (tonnes per hectare)\`, na.rm = TRUE)`
## <dbl>
## 1 30.5
##Tampilkan data jagung mulai tahun 2010, lalu menghitung simpangan baku per negara, dan mengurutkannya dari nilai yang paling besar
data_filter <- filter(df_crop, Year >= 2010)
data_group <- group_by(data_filter, Entity)
data_sum <- summarise(data_group,
SD_Maize = sd(`Maize (tonnes per hectare)`, na.rm = TRUE))
arrange(data_sum, desc(SD_Maize))
## # A tibble: 242 × 2
## Entity SD_Maize
## <chr> <dbl>
## 1 Kuwait 9.24
## 2 United Arab Emirates 9.19
## 3 Jordan 7.03
## 4 Israel 4.80
## 5 Saint Vincent and the Grenadines 2.89
## 6 Qatar 2.74
## 7 French Guiana 2.50
## 8 New Caledonia 2.29
## 9 Slovakia 1.68
## 10 Oman 1.61
## # ℹ 232 more rows