Praktik I Komputasi Statistika I
#install.packages("tidyverse")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'purrr' was built under R version 4.5.3
## Warning: package 'dplyr' was built under R version 4.5.3
## Warning: package 'stringr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/key_crop_yields.csv"
df_crop <- read_csv(url)
## Rows: 13075 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (12): Year, Wheat (tonnes per hectare), Rice (tonnes per hectare), Maize...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_select <- select(df_crop, Entity, Year, `Potatoes (tonnes per hectare)`, `Cassava (tonnes per hectare)`)
data_select
## # A tibble: 13,075 × 4
## Entity Year `Potatoes (tonnes per hectare)` Cassava (tonnes per hecta…¹
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 1961 8.67 NA
## 2 Afghanistan 1962 7.67 NA
## 3 Afghanistan 1963 8.13 NA
## 4 Afghanistan 1964 8.6 NA
## 5 Afghanistan 1965 8.8 NA
## 6 Afghanistan 1966 9.07 NA
## 7 Afghanistan 1967 9.8 NA
## 8 Afghanistan 1968 10 NA
## 9 Afghanistan 1969 10.2 NA
## 10 Afghanistan 1970 9.54 NA
## # ℹ 13,065 more rows
## # ℹ abbreviated name: ¹`Cassava (tonnes per hectare)`
select(df_crop, -c(`Soybeans (tonnes per hectare)`, `Beans (tonnes per hectare)`, `Peas (tonnes per hectare)`))
## # A tibble: 13,075 × 11
## Entity Code Year `Wheat (tonnes per hectare)` Rice (tonnes per hecta…¹
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1961 1.02 1.52
## 2 Afghanistan AFG 1962 0.974 1.52
## 3 Afghanistan AFG 1963 0.832 1.52
## 4 Afghanistan AFG 1964 0.951 1.73
## 5 Afghanistan AFG 1965 0.972 1.73
## 6 Afghanistan AFG 1966 0.867 1.52
## 7 Afghanistan AFG 1967 1.12 1.92
## 8 Afghanistan AFG 1968 1.16 1.95
## 9 Afghanistan AFG 1969 1.19 1.98
## 10 Afghanistan AFG 1970 0.956 1.81
## # ℹ 13,065 more rows
## # ℹ abbreviated name: ¹`Rice (tonnes per hectare)`
## # ℹ 6 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>,
## # `Cassava (tonnes per hectare)` <dbl>, `Barley (tonnes per hectare)` <dbl>,
## # `Cocoa beans (tonnes per hectare)` <dbl>,
## # `Bananas (tonnes per hectare)` <dbl>
data_padi<-filter(df_crop, Code=="IDN", `Rice (tonnes per hectare)` <2)
select(data_padi, `Entity`, `Year`, `Rice (tonnes per hectare)`)
## # A tibble: 7 × 3
## Entity Year `Rice (tonnes per hectare)`
## <chr> <dbl> <dbl>
## 1 Indonesia 1961 1.76
## 2 Indonesia 1962 1.79
## 3 Indonesia 1963 1.72
## 4 Indonesia 1964 1.76
## 5 Indonesia 1965 1.77
## 6 Indonesia 1966 1.77
## 7 Indonesia 1967 1.76
data_gandum<-filter(df_crop, Year > 2000, `Wheat (tonnes per hectare)` <5 )
select(data_gandum, `Year`, `Wheat (tonnes per hectare)` )
## # A tibble: 2,361 × 2
## Year `Wheat (tonnes per hectare)`
## <dbl> <dbl>
## 1 2001 0.898
## 2 2002 1.54
## 3 2003 1.5
## 4 2004 1.27
## 5 2005 1.82
## 6 2006 1.38
## 7 2007 1.82
## 8 2008 1.23
## 9 2009 1.97
## 10 2010 1.93
## # ℹ 2,351 more rows
data_2015<-filter(df_crop, Entity=="Indonesia" | Entity=="Malaysia" , Year==2015)
data_2015
## # A tibble: 2 × 14
## Entity Code Year `Wheat (tonnes per hectare)` `Rice (tonnes per hectare)`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Indonesia IDN 2015 NA 5.34
## 2 Malaysia MYS 2015 NA 4.02
## # ℹ 9 more variables: `Maize (tonnes per hectare)` <dbl>,
## # `Soybeans (tonnes per hectare)` <dbl>,
## # `Potatoes (tonnes per hectare)` <dbl>, `Beans (tonnes per hectare)` <dbl>,
## # `Peas (tonnes per hectare)` <dbl>, `Cassava (tonnes per hectare)` <dbl>,
## # `Barley (tonnes per hectare)` <dbl>,
## # `Cocoa beans (tonnes per hectare)` <dbl>,
## # `Bananas (tonnes per hectare)` <dbl>
data_maize <- df_crop %>%
select(Entity, Year, `Maize (tonnes per hectare)`) %>% filter(Year == 2020) %>%
summarise(`Minimum Maize` = min(`Maize (tonnes per hectare)`, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `Minimum Maize = min(`Maize (tonnes per hectare)`, na.rm =
## TRUE)`.
## Caused by warning in `min()`:
## ! no non-missing arguments to min; returning Inf
data_maize
## # A tibble: 1 × 1
## `Minimum Maize`
## <dbl>
## 1 Inf
data_idn <- df_crop %>%
filter(Entity == 'Indonesia') %>%
arrange(desc(`Potatoes (tonnes per hectare)`)) %>%
select(Entity, Code, Year, `Potatoes (tonnes per hectare)`)
data_idn
## # A tibble: 58 × 4
## Entity Code Year `Potatoes (tonnes per hectare)`
## <chr> <chr> <dbl> <dbl>
## 1 Indonesia IDN 2018 18.7
## 2 Indonesia IDN 2016 18.3
## 3 Indonesia IDN 2015 18.2
## 4 Indonesia IDN 2014 17.7
## 5 Indonesia IDN 2006 16.9
## 6 Indonesia IDN 2008 16.7
## 7 Indonesia IDN 1995 16.6
## 8 Indonesia IDN 2012 16.6
## 9 Indonesia IDN 2009 16.5
## 10 Indonesia IDN 2005 16.4
## # ℹ 48 more rows
data_rice <- df_crop %>%
mutate(Rice_Status = ifelse (`Rice (tonnes per hectare)` > 4,
"Tinggi", "Rendah")) %>%
select(Entity, `Rice (tonnes per hectare)`, Rice_Status)
data_rice
## # A tibble: 13,075 × 3
## Entity `Rice (tonnes per hectare)` Rice_Status
## <chr> <dbl> <chr>
## 1 Afghanistan 1.52 Rendah
## 2 Afghanistan 1.52 Rendah
## 3 Afghanistan 1.52 Rendah
## 4 Afghanistan 1.73 Rendah
## 5 Afghanistan 1.73 Rendah
## 6 Afghanistan 1.52 Rendah
## 7 Afghanistan 1.92 Rendah
## 8 Afghanistan 1.95 Rendah
## 9 Afghanistan 1.98 Rendah
## 10 Afghanistan 1.81 Rendah
## # ℹ 13,065 more rows
data_bananas <- df_crop %>%
filter(Entity == "Indonesia") %>%
summarise(`Mean Bananas` = mean(`Bananas (tonnes per hectare)`, na.rm = TRUE))
data_bananas
## # A tibble: 1 × 1
## `Mean Bananas`
## <dbl>
## 1 30.5
datasd_maize <- df_crop %>%
filter(Year >= 2010) %>%
group_by(Entity) %>%
summarise(`Simpangan Baku Maize` = sd(`Maize (tonnes per hectare)`, na.rm = TRUE)) %>%
arrange(desc(`Simpangan Baku Maize`))
datasd_maize
## # A tibble: 242 × 2
## Entity `Simpangan Baku Maize`
## <chr> <dbl>
## 1 Kuwait 9.24
## 2 United Arab Emirates 9.19
## 3 Jordan 7.03
## 4 Israel 4.80
## 5 Saint Vincent and the Grenadines 2.89
## 6 Qatar 2.74
## 7 French Guiana 2.50
## 8 New Caledonia 2.29
## 9 Slovakia 1.68
## 10 Oman 1.61
## # ℹ 232 more rows