# 1. Install and load necessary packages
library(viridis)
## Loading required package: viridisLite
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:viridis':
##
## unemp
library(gapminder)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# 2. Load the gapminder dataset and look at its structure
data("gapminder")
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
# 3. Extract the gapminder dataset where the continent is Asia
gapminder %>% filter(continent == "Asia")
## # A tibble: 396 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ℹ 386 more rows
# 4. Extract the gapminder dataset where the year is 1957
gapminder %>% filter(year == 1957)
## # A tibble: 142 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1957 30.3 9240934 821.
## 2 Albania Europe 1957 59.3 1476505 1942.
## 3 Algeria Africa 1957 45.7 10270856 3014.
## 4 Angola Africa 1957 32.0 4561361 3828.
## 5 Argentina Americas 1957 64.4 19610538 6857.
## 6 Australia Oceania 1957 70.3 9712569 10950.
## 7 Austria Europe 1957 67.5 6965860 8843.
## 8 Bahrain Asia 1957 53.8 138655 11636.
## 9 Bangladesh Asia 1957 39.3 51365468 662.
## 10 Belgium Europe 1957 69.2 8989111 9715.
## # ℹ 132 more rows
# 5. Extract the gapminder dataset where year is 2002 and country is China
gapminder %>% filter(year == 2002) %>% filter(country == "China")
## # A tibble: 1 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 2002 72.0 1280400000 3119.
# 6. Load and sort lifeExp in ascending and descending order
gapminder %>% arrange(lifeExp)
## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Rwanda Africa 1992 23.6 7290203 737.
## 2 Afghanistan Asia 1952 28.8 8425333 779.
## 3 Gambia Africa 1952 30 284320 485.
## 4 Angola Africa 1952 30.0 4232095 3521.
## 5 Sierra Leone Africa 1952 30.3 2143249 880.
## 6 Afghanistan Asia 1957 30.3 9240934 821.
## 7 Cambodia Asia 1977 31.2 6978607 525.
## 8 Mozambique Africa 1952 31.3 6446316 469.
## 9 Sierra Leone Africa 1957 31.6 2295678 1004.
## 10 Burkina Faso Africa 1952 32.0 4469979 543.
## # ℹ 1,694 more rows
gapminder %>% arrange(desc(lifeExp))
## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Japan Asia 2007 82.6 127467972 31656.
## 2 Hong Kong, China Asia 2007 82.2 6980412 39725.
## 3 Japan Asia 2002 82 127065841 28605.
## 4 Iceland Europe 2007 81.8 301931 36181.
## 5 Switzerland Europe 2007 81.7 7554661 37506.
## 6 Hong Kong, China Asia 2002 81.5 6762476 30209.
## 7 Australia Oceania 2007 81.2 20434176 34435.
## 8 Spain Europe 2007 80.9 40448191 28821.
## 9 Sweden Europe 2007 80.9 9031088 33860.
## 10 Israel Asia 2007 80.7 6426679 25523.
## # ℹ 1,694 more rows
# 7. Load and extract 1957 and sort pop in descending order
gapminder %>% filter(year==1957) %>% arrange(desc(pop))
## # A tibble: 142 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 1957 50.5 637408000 576.
## 2 India Asia 1957 40.2 409000000 590.
## 3 United States Americas 1957 69.5 171984000 14847.
## 4 Japan Asia 1957 65.5 91563009 4318.
## 5 Indonesia Asia 1957 39.9 90124000 859.
## 6 Germany Europe 1957 69.1 71019069 10188.
## 7 Brazil Americas 1957 53.3 65551171 2487.
## 8 United Kingdom Europe 1957 70.4 51430000 11283.
## 9 Bangladesh Asia 1957 39.3 51365468 662.
## 10 Italy Europe 1957 67.8 49182000 6249.
## # ℹ 132 more rows
# 8. Create a new attribute as lifeExpMonths
gapminder %>% mutate(lifeExpMonths = lifeExp * 12)
## # A tibble: 1,704 × 7
## country continent year lifeExp pop gdpPercap lifeExpMonths
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779. 346.
## 2 Afghanistan Asia 1957 30.3 9240934 821. 364.
## 3 Afghanistan Asia 1962 32.0 10267083 853. 384.
## 4 Afghanistan Asia 1967 34.0 11537966 836. 408.
## 5 Afghanistan Asia 1972 36.1 13079460 740. 433.
## 6 Afghanistan Asia 1977 38.4 14880372 786. 461.
## 7 Afghanistan Asia 1982 39.9 12881816 978. 478.
## 8 Afghanistan Asia 1987 40.8 13867957 852. 490.
## 9 Afghanistan Asia 1992 41.7 16317921 649. 500.
## 10 Afghanistan Asia 1997 41.8 22227415 635. 501.
## # ℹ 1,694 more rows
# 9. Converting LifeExp from years to month
gapminder %>% mutate(lifeExp = lifeExp * 12)
## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 346. 8425333 779.
## 2 Afghanistan Asia 1957 364. 9240934 821.
## 3 Afghanistan Asia 1962 384. 10267083 853.
## 4 Afghanistan Asia 1967 408. 11537966 836.
## 5 Afghanistan Asia 1972 433. 13079460 740.
## 6 Afghanistan Asia 1977 461. 14880372 786.
## 7 Afghanistan Asia 1982 478. 12881816 978.
## 8 Afghanistan Asia 1987 490. 13867957 852.
## 9 Afghanistan Asia 1992 500. 16317921 649.
## 10 Afghanistan Asia 1997 501. 22227415 635.
## # ℹ 1,694 more rows
# 10. Extract Year 2007, lifeExpMonths and arrange in Desc
gapminder %>% mutate(lifeExpMonths = 12 * lifeExp) %>% filter(year==2007) %>% arrange(desc(lifeExpMonths))
## # A tibble: 142 × 7
## country continent year lifeExp pop gdpPercap lifeExpMonths
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Japan Asia 2007 82.6 127467972 31656. 991.
## 2 Hong Kong, China Asia 2007 82.2 6980412 39725. 986.
## 3 Iceland Europe 2007 81.8 301931 36181. 981.
## 4 Switzerland Europe 2007 81.7 7554661 37506. 980.
## 5 Australia Oceania 2007 81.2 20434176 34435. 975.
## 6 Spain Europe 2007 80.9 40448191 28821. 971.
## 7 Sweden Europe 2007 80.9 9031088 33860. 971.
## 8 Israel Asia 2007 80.7 6426679 25523. 969.
## 9 France Europe 2007 80.7 61083916 30470. 968.
## 10 Canada Americas 2007 80.7 33390141 36319. 968.
## # ℹ 132 more rows
# 11. Create gapminder_1952
gapminder_1952 <- gapminder %>% filter(year == 1952)
gapminder_1952
## # A tibble: 142 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Albania Europe 1952 55.2 1282697 1601.
## 3 Algeria Africa 1952 43.1 9279525 2449.
## 4 Angola Africa 1952 30.0 4232095 3521.
## 5 Argentina Americas 1952 62.5 17876956 5911.
## 6 Australia Oceania 1952 69.1 8691212 10040.
## 7 Austria Europe 1952 66.8 6927772 6137.
## 8 Bahrain Asia 1952 50.9 120447 9867.
## 9 Bangladesh Asia 1952 37.5 46886859 684.
## 10 Belgium Europe 1952 68 8730405 8343.
## # ℹ 132 more rows
# 12. Using plot and qplot visualize gapminder_1952
plot(gapminder_1952, main="Gapminder_1952_21MIC0065")

qplot(data = gapminder_1952, x = gdpPercap, y = lifeExp, main="GDPvsLifeExp_21MIC0065") #for x and y we can take any column form gapminder_1952
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 13. Using ggplot for the data gapminder_1952, Visualize boxplot pop Vs gdpPercap
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) + geom_boxplot()+xlab("Population")+ylab("GDPperCaptia")+labs(title="Boxplot_popvsGDP_21MIC0065")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

# 14. Visualize scatter plot for the data gapminder_1952, pop Vs gdpPercap and Scale both x and y axis
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) + geom_point() + scale_x_log10() + scale_y_log10()+xlab("Population")+ylab("GDPperCaptia")+labs(title="Scatterplot_popvsGDP_21MIC0065")

# 15. Visualize scatter plot for the data gapminder_1952, pop Vs lifeExp, differentiate color for continent and size for gdpPercap
ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent, size = gdpPercap)) + geom_point()+xlab("Population")+ylab("Life Expentancy")+labs(title="Scatterplot_popvsLifeExp_21MIC0065")

# 16. For gapminder_1952, pop Vs lifeExp, Scatter plot, Subgraph for continent
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) + geom_point() + facet_wrap(~ continent)+xlab("Population")+ylab("Life Expentancy")+labs(title="Scatterplot_popvsGDP_21MIC0065")

# 17. For gapminder, pop vs lifeExp, scatter plot, subgraph for year
ggplot(gapminder, aes(x = pop, y = lifeExp)) + geom_point() + facet_wrap(~ year)+xlab("Population")+ylab("Life Expentancy")+labs(title="Scatterplot_popvsGDP_21MIC0065")

# 18. For gapminder, Summarize mean and median for lifeExp
gapminder %>% summarise(mean_lifeExp = mean(lifeExp), median_lifeExp = median(lifeExp))
## # A tibble: 1 × 2
## mean_lifeExp median_lifeExp
## <dbl> <dbl>
## 1 59.5 60.7
# 19. For gapminder 1957, median - lifeExp, max-gdpPercap
gapminder_1957 <- gapminder %>% filter(year == 1957)
gapminder_1957 %>% summarise(median_lifeExp = median(lifeExp), max_gdpPercap = max(gdpPercap))
## # A tibble: 1 × 2
## median_lifeExp max_gdpPercap
## <dbl> <dbl>
## 1 48.4 113523.
# 20. For gapminder group by Continent and year, summarize median - lifeExp and max - gdpPercap
gapminder %>% group_by(continent, year) %>% summarise(median_lifeExp = median(lifeExp), max_gdpPercap = max(gdpPercap))
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
## # A tibble: 60 × 4
## # Groups: continent [5]
## continent year median_lifeExp max_gdpPercap
## <fct> <int> <dbl> <dbl>
## 1 Africa 1952 38.8 4725.
## 2 Africa 1957 40.6 5487.
## 3 Africa 1962 42.6 6757.
## 4 Africa 1967 44.7 18773.
## 5 Africa 1972 47.0 21011.
## 6 Africa 1977 49.3 21951.
## 7 Africa 1982 50.8 17364.
## 8 Africa 1987 51.6 11864.
## 9 Africa 1992 52.4 13522.
## 10 Africa 1997 52.8 14723.
## # ℹ 50 more rows
# 21. Create an object by_year, group by year, median - lifeExp
by_year <- gapminder %>% group_by(year) %>% summarise(median_lifeExp = median(lifeExp))
by_year
## # A tibble: 12 × 2
## year median_lifeExp
## <int> <dbl>
## 1 1952 45.1
## 2 1957 48.4
## 3 1962 50.9
## 4 1967 53.8
## 5 1972 56.5
## 6 1977 59.7
## 7 1982 62.4
## 8 1987 65.8
## 9 1992 67.7
## 10 1997 69.4
## 11 2002 70.8
## 12 2007 71.9
# 22. Visualize scatter, year Vs MedianLifeExp for the dataset by_year, y axis should start from 0
ggplot(by_year, aes(x = year, y = median_lifeExp)) + geom_point() + ylim(0, NA)+xlab("Year")+ylab("Median_LifeExp")+labs(title="Scatterplot_21MIC0065")

# 23. Create an object by_continent - year 1952, group by continent and median - gdpPercap
by_continent <- gapminder_1952 %>% group_by(continent) %>% summarise(median_gdpPercap = median(gdpPercap))
by_continent
## # A tibble: 5 × 2
## continent median_gdpPercap
## <fct> <dbl>
## 1 Africa 987.
## 2 Americas 3048.
## 3 Asia 1207.
## 4 Europe 5142.
## 5 Oceania 10298.
# 24. Visualize by_continent, continent Vs MedianGdpPercap - col or bar plot
ggplot(by_continent, aes(x = continent, y = median_gdpPercap))+ geom_col()+xlab("Continent")+ylab("MedianGdpPercap")+labs(title="Colplot_21MIC0065")

# 25. Create gapminder_1952 and plot histogram for pop
gapminder_1952 <- gapminder %>% filter(year == 1952)
hist(gapminder_1952$pop, xlab = "Population", ylab = "Frequency", main="Histogram_PopvsFrequency_21MIC0065")
