Be sure to run
pacman::p_load(tidyverse, gapminder, palmerpenguins, janitor, gtsummary, visdat)
dim(penguins)
[1] 344 8
summary(penguins)
visdat::vis_dat(penguins)
bill_length_mm vs bill_depth_mm
penguins %>%
ggplot(aes(x = bill_length_mm,
y = bill_depth_mm)) +
geom_point() +
aes(color = sex)
penguins %>% # my date
drop_na() %>% # i will dropn the NA values
ggplot(aes(x = bill_length_mm,
y = bill_depth_mm)) +
geom_point() +
aes(color = sex)
make an histogram with body_mass_g
body_mass_g by species
Asia and 1952
1 United States Dollar equals 0.84 Euro
gapminder %>%
mutate(gpd_in_euros = gdpPercap * 0.84) %>%
group_by(continent, year) %>%
summarise(meanGdpperCapEur = mean(gpd_in_euros))
`summarise()` regrouping output by 'continent' (override with `.groups` argument)
gapminder %>%
mutate(gpd_in_euros = gdpPercap * 0.84) %>%
group_by(continent, year) %>%
summarise(meanGdpperCapEur = mean(gpd_in_euros)) %>%
ggplot(aes(x = year,
y = meanGdpperCapEur,
color = continent)) +
geom_line() +
scale_y_log10()
`summarise()` regrouping output by 'continent' (override with `.groups` argument)
| Characteristic | Adelie, N = 1461 | Chinstrap, N = 681 | Gentoo, N = 1191 |
|---|---|---|---|
| body_mass_g | 3,700 (3,362, 4,000) | 3,700 (3,488, 3,950) | 5,050 (4,700, 5,500) |
| bill_length_mm | 38.8 (36.7, 40.8) | 49.5 (46.3, 51.1) | 47.4 (45.3, 49.6) |
| island | |||
| Biscoe | 44 (30%) | 0 (0%) | 119 (100%) |
| Dream | 55 (38%) | 68 (100%) | 0 (0%) |
| Torgersen | 47 (32%) | 0 (0%) | 0 (0%) |
|
1
Statistics presented: Median (IQR); n (%)
|
|||
unicef <- read_csv("https://bit.ly/unicef-wide")
gapminder %>%
group_by(continent, year) %>%
summarise(LifeExpMean = mean(lifeExp))
`summarise()` regrouping output by 'continent' (override with `.groups` argument)
gapminder %>%
group_by(continent, year) %>%
summarise(LifeExpMean = mean(lifeExp)) %>%
pivot_wider(names_from = year,
values_from = LifeExpMean )
`summarise()` regrouping output by 'continent' (override with `.groups` argument)
countries_list <- read_csv("https://datahub.io/JohnSnowLabs/country-and-continent-codes-list/r/country-and-continent-codes-list-csv.csv")
── Column specification ─────────────────────────────────────────────────────────────────
cols(
Continent_Name = col_character(),
Continent_Code = col_character(),
Country_Name = col_character(),
Two_Letter_Country_Code = col_character(),
Three_Letter_Country_Code = col_character(),
Country_Number = col_double()
)
names(unicef)
[1] "country_name" "u5mr_1950" "u5mr_1951"
[4] "u5mr_1952" "u5mr_1953" "u5mr_1954"
[7] "u5mr_1955" "u5mr_1956" "u5mr_1957"
[10] "u5mr_1958" "u5mr_1959" "u5mr_1960"
[13] "u5mr_1961" "u5mr_1962" "u5mr_1963"
[16] "u5mr_1964" "u5mr_1965" "u5mr_1966"
[19] "u5mr_1967" "u5mr_1968" "u5mr_1969"
[22] "u5mr_1970" "u5mr_1971" "u5mr_1972"
[25] "u5mr_1973" "u5mr_1974" "u5mr_1975"
[28] "u5mr_1976" "u5mr_1977" "u5mr_1978"
[31] "u5mr_1979" "u5mr_1980" "u5mr_1981"
[34] "u5mr_1982" "u5mr_1983" "u5mr_1984"
[37] "u5mr_1985" "u5mr_1986" "u5mr_1987"
[40] "u5mr_1988" "u5mr_1989" "u5mr_1990"
[43] "u5mr_1991" "u5mr_1992" "u5mr_1993"
[46] "u5mr_1994" "u5mr_1995" "u5mr_1996"
[49] "u5mr_1997" "u5mr_1998" "u5mr_1999"
[52] "u5mr_2000" "u5mr_2001" "u5mr_2002"
[55] "u5mr_2003" "u5mr_2004" "u5mr_2005"
[58] "u5mr_2006" "u5mr_2007" "u5mr_2008"
[61] "u5mr_2009" "u5mr_2010" "u5mr_2011"
[64] "u5mr_2012" "u5mr_2013" "u5mr_2014"
[67] "u5mr_2015" "Code" "Continent_Name"
[70] "Continent_Code" "Country_Name" "Three_Letter_Country_Code"
[73] "Country_Number"
calculate the mortality rate by continent and year
What is the trend in the mortality rate by continent?
unicef %>%
# first reformat from wide to long
pivot_longer(u5mr_1950:u5mr_2015,
names_to = "year",
values_to = "value") %>%
# select only relevant columns
select(continent_name, year, value) %>%
# separate the year column
separate(year, into = c("delete", "year"),
sep = "_") %>%
# now delete the delete column
select(-delete) %>%
# group and summarize
group_by(continent_name, year) %>%
summarise(meanMrtRate5y = mean(value)) %>%
ggplot(aes(x = year,
y = meanMrtRate5y,
color = continent_name)) +
geom_point()
`summarise()` regrouping output by 'continent_name' (override with `.groups` argument)