#Name: Tomin Tom
#Reg. No:21MIC0066
#1. Install and load the necessary packages tidyr, dplyr, ggplot
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

#2. Load the gapminder dataset and look its structure
library(gapminder)
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num [1:1704] 28.8 30.3 32 34 36.1 ...
##  $ pop      : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
#3. Extract the gapminder dataset where the continent is Asia
gapminder %>% filter(continent == "Asia")
## # A tibble: 396 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ℹ 386 more rows
#4. Extract the gapminder dataset where the year is 1957
gapminder %>% filter(year == 1957)
## # A tibble: 142 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1957    30.3  9240934      821.
##  2 Albania     Europe     1957    59.3  1476505     1942.
##  3 Algeria     Africa     1957    45.7 10270856     3014.
##  4 Angola      Africa     1957    32.0  4561361     3828.
##  5 Argentina   Americas   1957    64.4 19610538     6857.
##  6 Australia   Oceania    1957    70.3  9712569    10950.
##  7 Austria     Europe     1957    67.5  6965860     8843.
##  8 Bahrain     Asia       1957    53.8   138655    11636.
##  9 Bangladesh  Asia       1957    39.3 51365468      662.
## 10 Belgium     Europe     1957    69.2  8989111     9715.
## # ℹ 132 more rows
#5. Extract the gapminder dataset where year 2002 and country is China
gapminder %>% filter(year == 2002,country == "China")
## # A tibble: 1 × 6
##   country continent  year lifeExp        pop gdpPercap
##   <fct>   <fct>     <int>   <dbl>      <int>     <dbl>
## 1 China   Asia       2002    72.0 1280400000     3119.
#6. Load and Sort lifeExp in ascending and descending order
lifeExpasc <- gapminder %>% arrange(lifeExp)
lifeExpdesc <- gapminder %>% arrange(desc(lifeExp))
lifeExpasc
## # A tibble: 1,704 × 6
##    country      continent  year lifeExp     pop gdpPercap
##    <fct>        <fct>     <int>   <dbl>   <int>     <dbl>
##  1 Rwanda       Africa     1992    23.6 7290203      737.
##  2 Afghanistan  Asia       1952    28.8 8425333      779.
##  3 Gambia       Africa     1952    30    284320      485.
##  4 Angola       Africa     1952    30.0 4232095     3521.
##  5 Sierra Leone Africa     1952    30.3 2143249      880.
##  6 Afghanistan  Asia       1957    30.3 9240934      821.
##  7 Cambodia     Asia       1977    31.2 6978607      525.
##  8 Mozambique   Africa     1952    31.3 6446316      469.
##  9 Sierra Leone Africa     1957    31.6 2295678     1004.
## 10 Burkina Faso Africa     1952    32.0 4469979      543.
## # ℹ 1,694 more rows
lifeExpdesc
## # A tibble: 1,704 × 6
##    country          continent  year lifeExp       pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Japan            Asia       2007    82.6 127467972    31656.
##  2 Hong Kong, China Asia       2007    82.2   6980412    39725.
##  3 Japan            Asia       2002    82   127065841    28605.
##  4 Iceland          Europe     2007    81.8    301931    36181.
##  5 Switzerland      Europe     2007    81.7   7554661    37506.
##  6 Hong Kong, China Asia       2002    81.5   6762476    30209.
##  7 Australia        Oceania    2007    81.2  20434176    34435.
##  8 Spain            Europe     2007    80.9  40448191    28821.
##  9 Sweden           Europe     2007    80.9   9031088    33860.
## 10 Israel           Asia       2007    80.7   6426679    25523.
## # ℹ 1,694 more rows
#7. Load and extract 1957 and sort pop in descending order
gapminder %>% filter(year == 1957) %>% arrange(desc(pop))
## # A tibble: 142 × 6
##    country        continent  year lifeExp       pop gdpPercap
##    <fct>          <fct>     <int>   <dbl>     <int>     <dbl>
##  1 China          Asia       1957    50.5 637408000      576.
##  2 India          Asia       1957    40.2 409000000      590.
##  3 United States  Americas   1957    69.5 171984000    14847.
##  4 Japan          Asia       1957    65.5  91563009     4318.
##  5 Indonesia      Asia       1957    39.9  90124000      859.
##  6 Germany        Europe     1957    69.1  71019069    10188.
##  7 Brazil         Americas   1957    53.3  65551171     2487.
##  8 United Kingdom Europe     1957    70.4  51430000    11283.
##  9 Bangladesh     Asia       1957    39.3  51365468      662.
## 10 Italy          Europe     1957    67.8  49182000     6249.
## # ℹ 132 more rows
#8. Create a new attribute as lifeExpMonths
gapminder %>% mutate(lifeExpMonths = lifeExp*12)
## # A tibble: 1,704 × 7
##    country     continent  year lifeExp      pop gdpPercap lifeExpMonths
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>         <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.          346.
##  2 Afghanistan Asia       1957    30.3  9240934      821.          364.
##  3 Afghanistan Asia       1962    32.0 10267083      853.          384.
##  4 Afghanistan Asia       1967    34.0 11537966      836.          408.
##  5 Afghanistan Asia       1972    36.1 13079460      740.          433.
##  6 Afghanistan Asia       1977    38.4 14880372      786.          461.
##  7 Afghanistan Asia       1982    39.9 12881816      978.          478.
##  8 Afghanistan Asia       1987    40.8 13867957      852.          490.
##  9 Afghanistan Asia       1992    41.7 16317921      649.          500.
## 10 Afghanistan Asia       1997    41.8 22227415      635.          501.
## # ℹ 1,694 more rows
#9. Append the existing attibute of lifeExp into months

gapminder %>% mutate(lifeExp = lifeExp*12)
## # A tibble: 1,704 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    346.  8425333      779.
##  2 Afghanistan Asia       1957    364.  9240934      821.
##  3 Afghanistan Asia       1962    384. 10267083      853.
##  4 Afghanistan Asia       1967    408. 11537966      836.
##  5 Afghanistan Asia       1972    433. 13079460      740.
##  6 Afghanistan Asia       1977    461. 14880372      786.
##  7 Afghanistan Asia       1982    478. 12881816      978.
##  8 Afghanistan Asia       1987    490. 13867957      852.
##  9 Afghanistan Asia       1992    500. 16317921      649.
## 10 Afghanistan Asia       1997    501. 22227415      635.
## # ℹ 1,694 more rows
#10. Extract Year 2007, lifeExpMonths and arrange in Desc
gapminder %>% filter(year == 2007) %>% mutate(lifeExpMonths = lifeExp*12) %>% arrange(desc(lifeExpMonths))
## # A tibble: 142 × 7
##    country          continent  year lifeExp       pop gdpPercap lifeExpMonths
##    <fct>            <fct>     <int>   <dbl>     <int>     <dbl>         <dbl>
##  1 Japan            Asia       2007    82.6 127467972    31656.          991.
##  2 Hong Kong, China Asia       2007    82.2   6980412    39725.          986.
##  3 Iceland          Europe     2007    81.8    301931    36181.          981.
##  4 Switzerland      Europe     2007    81.7   7554661    37506.          980.
##  5 Australia        Oceania    2007    81.2  20434176    34435.          975.
##  6 Spain            Europe     2007    80.9  40448191    28821.          971.
##  7 Sweden           Europe     2007    80.9   9031088    33860.          971.
##  8 Israel           Asia       2007    80.7   6426679    25523.          969.
##  9 France           Europe     2007    80.7  61083916    30470.          968.
## 10 Canada           Americas   2007    80.7  33390141    36319.          968.
## # ℹ 132 more rows
#11. Create gapminder_1952
gapminder_1952 <- gapminder %>% filter(year==1952)
gapminder_1952
## # A tibble: 142 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Albania     Europe     1952    55.2  1282697     1601.
##  3 Algeria     Africa     1952    43.1  9279525     2449.
##  4 Angola      Africa     1952    30.0  4232095     3521.
##  5 Argentina   Americas   1952    62.5 17876956     5911.
##  6 Australia   Oceania    1952    69.1  8691212    10040.
##  7 Austria     Europe     1952    66.8  6927772     6137.
##  8 Bahrain     Asia       1952    50.9   120447     9867.
##  9 Bangladesh  Asia       1952    37.5 46886859      684.
## 10 Belgium     Europe     1952    68    8730405     8343.
## # ℹ 132 more rows
#12. Using plot and qplot visualize gapminder_1952
plot(gapminder_1952$pop, gapminder_1952$lifeExp,
     main = "21MIC0066 GDP per Capita vs Life Expectancy",
     xlab = "GDP per Capita",
     ylab = "Life Expectancy",log="xy")

qplot(data = gapminder_1952, x = gdpPercap, y = lifeExp,
      main = "21MIC0066 GDP per capita vs Life Expectancy",
      xlab = "GDP per Capita",
      ylab = "Life Expectancy")+scale_x_log10()+scale_y_log10()
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#13. Using ggplot for the data gapminder_1952 Visualize boxplot pop Vs gdpPercap
ggplot(data=gapminder_1952,aes(x=pop,y=gdpPercap))+geom_boxplot()+
  ggtitle("21MIC0066 Population vs GDP per capita") +
  xlab("Population") +
  ylab("Gdp per capita")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

#14. Visualize scatter plot for the data gapminder_1952, pop Vs gdpPercap and Scale both x and y axis.
ggplot(data=gapminder_1952,aes(x=pop,y=gdpPercap))+geom_point()+
  ggtitle("21MIC0066 Population vs GDP per capita") +
  xlab("Population") +
  ylab("Gdp per capita")+scale_x_log10()+scale_y_log10()

#15. Visualize scatter plot for the data gapminder_1952, pop Vs lifeExp, differentiate color for continent and size for gdpPercap
ggplot(data=gapminder_1952,aes(x=pop,y=lifeExp,color=continent,size=gdpPercap))+geom_point()+
  ggtitle("21MIC0066 Population vs Life Expectancy") +
  xlab("Population") +
  ylab("Life Expectancy")+scale_x_log10()+scale_y_log10()

#16. For gapminder_1952, pop Vs lifeExp, Scatter plot, Subgraph for continent
ggplot(data=gapminder_1952,aes(x=pop,y=lifeExp,color=continent))+geom_point()+facet_wrap(~continent)+
  ggtitle("21MIC0066 Population vs Life Expectancy") +
  xlab("Population") +
  ylab("Life Expectancy")

#17. For gapminder, pop vs lifeExp, scatter plot, subgraph for year
ggplot(data=gapminder,aes(x=pop,y=lifeExp))+geom_point()+facet_wrap(~year)+
  ggtitle("21MIC0066 Population vs Life Expectancy") +
  xlab("Population") +
  ylab("Life Expectancy")

#18. For gapminder, Summarize mean and median for lifeExp


#19. For gapminder 1957, median - lifeExp, max gdpPercap


#20. For gapminder group by Continent and year, summarize medain - lifeExp and max - gdpPercap