Use gapminder package and data set to answer the following questions. Please write your commands under each question.

1. Get the data for 2002. Assign a name to that data.

data_2002 <- gapminder %>% 
  filter(year == 2002)
data_2002
## # A tibble: 142 × 6
##    country     continent  year lifeExp       pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Afghanistan Asia       2002    42.1  25268405      727.
##  2 Albania     Europe     2002    75.7   3508512     4604.
##  3 Algeria     Africa     2002    71.0  31287142     5288.
##  4 Angola      Africa     2002    41.0  10866106     2773.
##  5 Argentina   Americas   2002    74.3  38331121     8798.
##  6 Australia   Oceania    2002    80.4  19546792    30688.
##  7 Austria     Europe     2002    79.0   8148312    32418.
##  8 Bahrain     Asia       2002    74.8    656397    23404.
##  9 Bangladesh  Asia       2002    62.0 135656790     1136.
## 10 Belgium     Europe     2002    78.3  10311970    30486.
## # ℹ 132 more rows

2. Get the data for Germany in 2002.

germany_2002 <- gapminder %>% 
  filter(year == 2002, country == "Germany")
germany_2002
## # A tibble: 1 × 6
##   country continent  year lifeExp      pop gdpPercap
##   <fct>   <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Germany Europe     2002    78.7 82350671    30036.

3. Find which country has the lowest lifeExp overall.

lowest_lifeExp_overall <- gapminder %>% 
  filter(lifeExp == min(lifeExp)) %>%
  select(country, lifeExp)
lowest_lifeExp_overall
## # A tibble: 1 × 2
##   country lifeExp
##   <fct>     <dbl>
## 1 Rwanda     23.6

4. Find which country has the lowest lifeExp in 2002.

lowest_lifeExp_2002 <- gapminder %>% 
  filter(year == 2002) %>%
  filter(lifeExp == min(lifeExp)) %>%
  select(country, lifeExp)
lowest_lifeExp_2002
## # A tibble: 1 × 2
##   country lifeExp
##   <fct>     <dbl>
## 1 Zambia     39.2

5. Find the lifeExp in Germany in 2002.

lifeExp_germany_2002 <- gapminder %>% 
  filter(year == 2002, country == "Germany") %>%
  select(country,lifeExp)
lifeExp_germany_2002
## # A tibble: 1 × 2
##   country lifeExp
##   <fct>     <dbl>
## 1 Germany    78.7

6. Find the countries whose lifeExp is higher than 80 in 2002.

high_lifeExp_countries_2002 <- gapminder %>% 
  filter(year == 2002, lifeExp > 80) %>%
  select(country, lifeExp)
high_lifeExp_countries_2002
## # A tibble: 7 × 2
##   country          lifeExp
##   <fct>              <dbl>
## 1 Australia           80.4
## 2 Hong Kong, China    81.5
## 3 Iceland             80.5
## 4 Italy               80.2
## 5 Japan               82  
## 6 Sweden              80.0
## 7 Switzerland         80.6

7. Find the countries whose lifeExp is more than 70 and less than 80

mid_lifeExp_countries <- gapminder %>% 
  filter(lifeExp > 70 & lifeExp < 80) %>%
  select(country, lifeExp)
mid_lifeExp_countries
## # A tibble: 471 × 2
##    country   lifeExp
##    <fct>       <dbl>
##  1 Albania      70.4
##  2 Albania      72  
##  3 Albania      71.6
##  4 Albania      73.0
##  5 Albania      75.7
##  6 Albania      76.4
##  7 Algeria      71.0
##  8 Algeria      72.3
##  9 Argentina    70.8
## 10 Argentina    71.9
## # ℹ 461 more rows

8. Find the lifeExp in Europe across the years. Which year is the highest lifeExp in Europe?

europe_lifeExp <- gapminder %>% 
  filter(continent == "Europe") %>%
  group_by(year) %>%
  summarize(avg_lifeExp = mean(lifeExp))

highest_lifeExp_year <- europe_lifeExp %>% 
  filter(avg_lifeExp == max(avg_lifeExp))
highest_lifeExp_year
## # A tibble: 1 × 2
##    year avg_lifeExp
##   <int>       <dbl>
## 1  2007        77.6

9. Define gdp as it is equal to to gdpPercap * pop/10000 . Find the gdp of Europe in 2002.

data_2002 <- data_2002 %>%
  mutate(gdp = gdpPercap * pop / 10000)

europe_gdp_2002 <- data_2002 %>% 
  filter(continent == "Europe") %>%
  summarize(total_gdp = sum(gdp))
europe_gdp_2002
## # A tibble: 1 × 1
##     total_gdp
##         <dbl>
## 1 1309346445.

10. Which country has the highest gdp in Europe in 2002 ?

highest_gdp_country <- data_2002 %>% 
  filter(continent == "Europe") %>%
  filter(gdp == max(gdp)) %>%
  select(country, gdp)
highest_gdp_country
## # A tibble: 1 × 2
##   country        gdp
##   <fct>        <dbl>
## 1 Germany 247346845.

11. Save the data in 2002 in Europe. Call it data_2002.

data_2002 <- gapminder %>%
  filter(year == 2002, continent == "Europe")
data_2002
## # A tibble: 30 × 6
##    country                continent  year lifeExp      pop gdpPercap
##    <fct>                  <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Albania                Europe     2002    75.7  3508512     4604.
##  2 Austria                Europe     2002    79.0  8148312    32418.
##  3 Belgium                Europe     2002    78.3 10311970    30486.
##  4 Bosnia and Herzegovina Europe     2002    74.1  4165416     6019.
##  5 Bulgaria               Europe     2002    72.1  7661799     7697.
##  6 Croatia                Europe     2002    74.9  4481020    11628.
##  7 Czech Republic         Europe     2002    75.5 10256295    17596.
##  8 Denmark                Europe     2002    77.2  5374693    32167.
##  9 Finland                Europe     2002    78.4  5193039    28205.
## 10 France                 Europe     2002    79.6 59925035    28926.
## # ℹ 20 more rows

12. Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp.

ggplot(data_2002, aes(x = gdpPercap, y = lifeExp)) +
  geom_point() +
  labs(title = "GDP per Capita vs Life Expectancy in 2002",
       x = "GDP per Capita",
       y = "Life Expectancy")

13. Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp by continent (color)

ggplot(data_2002, aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  labs(title = "GDP per Capita vs Life Expectancy in 2002 by Continent",
       x = "GDP per Capita",
       y = "Life Expectancy")

14. Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp by continent and pop (color and size)

ggplot(data_2002, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
  geom_point() +
  labs(title = "GDP per Capita vs Life Expectancy in 2002 by Continent and Population",
       x = "GDP per Capita",
       y = "Life Expectancy")

15. Get data for Europe in 2002. Call it data_Europe

data_Europe <- gapminder %>% 
  filter(year == 2002, continent == "Europe")
data_Europe
## # A tibble: 30 × 6
##    country                continent  year lifeExp      pop gdpPercap
##    <fct>                  <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Albania                Europe     2002    75.7  3508512     4604.
##  2 Austria                Europe     2002    79.0  8148312    32418.
##  3 Belgium                Europe     2002    78.3 10311970    30486.
##  4 Bosnia and Herzegovina Europe     2002    74.1  4165416     6019.
##  5 Bulgaria               Europe     2002    72.1  7661799     7697.
##  6 Croatia                Europe     2002    74.9  4481020    11628.
##  7 Czech Republic         Europe     2002    75.5 10256295    17596.
##  8 Denmark                Europe     2002    77.2  5374693    32167.
##  9 Finland                Europe     2002    78.4  5193039    28205.
## 10 France                 Europe     2002    79.6 59925035    28926.
## # ℹ 20 more rows

16. Use data_Europe. Use ggplot. Plot pop vs gdpPercap.

ggplot(data_Europe, aes(x = pop, y = gdpPercap)) +
  geom_point() +
  labs(title = "Population vs GDP per Capita in Europe (2002)",
       x = "Population",
       y = "GDP per Capita")

17. Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10

ggplot(data_Europe, aes(x = log10(pop), y = gdpPercap)) +
  geom_point() +
  labs(title = "Log(Population) vs GDP per Capita in Europe (2002)",
       x = "Log(Population)",
       y = "GDP per Capita")

18. Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10. Color the data by country.

ggplot(data_Europe, aes(x = log10(pop), y = gdpPercap, color = country)) +
  geom_point() +
  labs(title = "Log(Population) vs GDP per Capita in Europe (2002) by Country",
       x = "Log(Population)",
       y = "GDP per Capita")

19. Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10. Color the data by country and size it by lifeExp.

ggplot(data_Europe, aes(x = log10(pop), y = gdpPercap, color = country, size = lifeExp)) +
  geom_point() +
  labs(title = "Log(Population) vs GDP per Capita in Europe (2002) by Country and Life Expectancy",
       x = "Log(Population)",
       y = "GDP per Capita")

20. See the attached file in excel, namely,tourism.xls. Create a folder and give a name FORECASTING.

1) Save the tourism excel file in that FORECASTING directory.

2) Set your working directory as FORECASTING

3) Import tourism excel file into R-studio.

4) Assign a different name to this data, such as “mydata”

5) Check the structure of your dataset by str() function. Change Region column from character to factor. Use as.factor() function.

options(digits = 3, scipen = 9999, stringasFactors = FALSE)
# make sure characters are not factors. The 1st column, Quarter, needs to be NOT factor.

library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
setwd("C://FORECASTING")
mydata <- read_excel("C://FORECASTING//tourism.xlsx")
str(mydata)
## tibble [24,320 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Quarter: chr [1:24320] "1998-01-01" "1998-04-01" "1998-07-01" "1998-10-01" ...
##  $ Region : chr [1:24320] "Adelaide" "Adelaide" "Adelaide" "Adelaide" ...
##  $ State  : chr [1:24320] "South Australia" "South Australia" "South Australia" "South Australia" ...
##  $ Purpose: chr [1:24320] "Business" "Business" "Business" "Business" ...
##  $ Trips  : num [1:24320] 135 110 166 127 137 ...
mydata$Region <- as.factor(mydata$Region)
str(mydata)
## tibble [24,320 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Quarter: chr [1:24320] "1998-01-01" "1998-04-01" "1998-07-01" "1998-10-01" ...
##  $ Region : Factor w/ 76 levels "Adelaide","Adelaide Hills",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ State  : chr [1:24320] "South Australia" "South Australia" "South Australia" "South Australia" ...
##  $ Purpose: chr [1:24320] "Business" "Business" "Business" "Business" ...
##  $ Trips  : num [1:24320] 135 110 166 127 137 ...