Use gapminder package and data set to answer the following questions. Please write your commands under each question.

1. Get the data for 2002. Assign a name to that data.

Data_2002 <- gapminder %>% filter(year == "2002")
Data_2002
## # A tibble: 142 × 6
##    country     continent  year lifeExp       pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Afghanistan Asia       2002    42.1  25268405      727.
##  2 Albania     Europe     2002    75.7   3508512     4604.
##  3 Algeria     Africa     2002    71.0  31287142     5288.
##  4 Angola      Africa     2002    41.0  10866106     2773.
##  5 Argentina   Americas   2002    74.3  38331121     8798.
##  6 Australia   Oceania    2002    80.4  19546792    30688.
##  7 Austria     Europe     2002    79.0   8148312    32418.
##  8 Bahrain     Asia       2002    74.8    656397    23404.
##  9 Bangladesh  Asia       2002    62.0 135656790     1136.
## 10 Belgium     Europe     2002    78.3  10311970    30486.
## # ℹ 132 more rows

2. Get the data for Germany in 2002.

Germany_data <- gapminder%>%
  filter(country == "Germany", year == "2002")
Germany_data
## # A tibble: 1 × 6
##   country continent  year lifeExp      pop gdpPercap
##   <fct>   <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Germany Europe     2002    78.7 82350671    30036.

3. Find which country has the lowest lifeExp overall.

LowLifeExp <- gapminder %>%
  filter(lifeExp == min(lifeExp)) %>%
  select(country, lifeExp)
LowLifeExp
## # A tibble: 1 × 2
##   country lifeExp
##   <fct>     <dbl>
## 1 Rwanda     23.6

4. Find which country has the lowest lifeExp in 2002.

LowLifeExp2002 <- gapminder%>%
  filter(year == "2002") %>%
  filter(lifeExp == min(lifeExp))%>%
  select(country,lifeExp)
LowLifeExp2002
## # A tibble: 1 × 2
##   country lifeExp
##   <fct>     <dbl>
## 1 Zambia     39.2

5. Find the lifeExp in Germany in 2002.

LifeExpGermany <- gapminder %>%
  filter(country == "Germany", year == "2002")%>%
select(country,lifeExp)
LifeExpGermany
## # A tibble: 1 × 2
##   country lifeExp
##   <fct>     <dbl>
## 1 Germany    78.7

6. Find the countries whose lifeExp is higher than 80 in 2002.

HighLifeExp = gapminder %>%
  filter(year == "2002", lifeExp > 80) %>%
  select(country,lifeExp)
HighLifeExp
## # A tibble: 7 × 2
##   country          lifeExp
##   <fct>              <dbl>
## 1 Australia           80.4
## 2 Hong Kong, China    81.5
## 3 Iceland             80.5
## 4 Italy               80.2
## 5 Japan               82  
## 6 Sweden              80.0
## 7 Switzerland         80.6

7. Find the countries whose lifeExp is more than 70 and less than 80

LifeExpRange = gapminder %>%
  filter(lifeExp >70 & lifeExp <80) %>%
  distinct(country)
LifeExpRange
## # A tibble: 84 × 1
##    country               
##    <fct>                 
##  1 Albania               
##  2 Algeria               
##  3 Argentina             
##  4 Australia             
##  5 Austria               
##  6 Bahrain               
##  7 Belgium               
##  8 Bosnia and Herzegovina
##  9 Brazil                
## 10 Bulgaria              
## # ℹ 74 more rows

8. Find the lifeExp in Europe across the years. Which year is the highest lifeExp in Europe?

#LifeExp in Europe across Years
LifeExpEurope <- gapminder %>%
  filter(continent == "Europe") %>%
  group_by(year) %>%
  summarise(avg_lifeExp = mean(lifeExp))
LifeExpEurope
## # A tibble: 12 × 2
##     year avg_lifeExp
##    <int>       <dbl>
##  1  1952        64.4
##  2  1957        66.7
##  3  1962        68.5
##  4  1967        69.7
##  5  1972        70.8
##  6  1977        71.9
##  7  1982        72.8
##  8  1987        73.6
##  9  1992        74.4
## 10  1997        75.5
## 11  2002        76.7
## 12  2007        77.6
HighLifeExpEurope <- LifeExpEurope %>%
  filter(avg_lifeExp == max(avg_lifeExp))
HighLifeExpEurope
## # A tibble: 1 × 2
##    year avg_lifeExp
##   <int>       <dbl>
## 1  2007        77.6

9. Define gdp as it is equal to to gdpPercap * pop/10000 . Find the gdp of Europe in 2002.

EuropeGDP2002 <- gapminder %>%
  filter(continent == "Europe" & year == 2002)%>%
  mutate(GDP = (gdpPercap*pop)/1000)%>%
  summarise(total_gdp = sum(GDP))
EuropeGDP2002
## # A tibble: 1 × 1
##      total_gdp
##          <dbl>
## 1 13093464453.

10. Which country has the highest gdp in Europe in 2002 ?

Highest_GDP_Europe_2002 <- gapminder %>%
  filter(continent == "Europe" & year == 2002) %>%
  mutate(gdp = (gdpPercap * pop) / 10000) %>%
  filter(gdp == max(gdp)) %>%
  select(country, gdp)
Highest_GDP_Europe_2002
## # A tibble: 1 × 2
##   country        gdp
##   <fct>        <dbl>
## 1 Germany 247346845.

11. Save the data in 2002 in Europe. Call it data_2002.

data_2002 <- gapminder %>%
 filter(year == 2002, continent == "Europe")
data_2002
## # A tibble: 30 × 6
##    country                continent  year lifeExp      pop gdpPercap
##    <fct>                  <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Albania                Europe     2002    75.7  3508512     4604.
##  2 Austria                Europe     2002    79.0  8148312    32418.
##  3 Belgium                Europe     2002    78.3 10311970    30486.
##  4 Bosnia and Herzegovina Europe     2002    74.1  4165416     6019.
##  5 Bulgaria               Europe     2002    72.1  7661799     7697.
##  6 Croatia                Europe     2002    74.9  4481020    11628.
##  7 Czech Republic         Europe     2002    75.5 10256295    17596.
##  8 Denmark                Europe     2002    77.2  5374693    32167.
##  9 Finland                Europe     2002    78.4  5193039    28205.
## 10 France                 Europe     2002    79.6 59925035    28926.
## # ℹ 20 more rows

12. Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp.

ggplot(data_2002, aes(x = gdpPercap, y = lifeExp)) +
 geom_point()

13. Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp by continent (color)

ggplot(data_2002, aes(x = gdpPercap, y = lifeExp, color = continent)) +
 geom_point()

14. Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp by continent and pop (color and size)

ggplot(data_2002, aes(x = gdpPercap, y = lifeExp, color = continent, size =
pop)) +
 geom_point()

15. Get data for Europe in 2002. Call it data_Europe

data_Europe <- gapminder %>%
 filter(continent == "Europe", year == "2002")
data_Europe
## # A tibble: 30 × 6
##    country                continent  year lifeExp      pop gdpPercap
##    <fct>                  <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Albania                Europe     2002    75.7  3508512     4604.
##  2 Austria                Europe     2002    79.0  8148312    32418.
##  3 Belgium                Europe     2002    78.3 10311970    30486.
##  4 Bosnia and Herzegovina Europe     2002    74.1  4165416     6019.
##  5 Bulgaria               Europe     2002    72.1  7661799     7697.
##  6 Croatia                Europe     2002    74.9  4481020    11628.
##  7 Czech Republic         Europe     2002    75.5 10256295    17596.
##  8 Denmark                Europe     2002    77.2  5374693    32167.
##  9 Finland                Europe     2002    78.4  5193039    28205.
## 10 France                 Europe     2002    79.6 59925035    28926.
## # ℹ 20 more rows

16. Use data_Europe. Use ggplot. Plot pop vs gdpPercap.

17. Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10

ggplot(data_Europe, aes(x = log10(pop), y = gdpPercap)) +
 geom_point(color = "blue", alpha = 0.7) +
  labs(title = "Population vs GDP per Capita in Europe",
       x = "Population",
       y = "GDP per Capita") +
  theme_minimal()

18. Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10. Color the data by country.

ggplot(data_Europe, aes(x = log10(pop), y = gdpPercap, color = country)) +
  geom_point(size = 3, alpha = 0.7) +
  labs(title = "Population vs GDP per Capita in Europe",
       x = "Population",
       y = "GDP per Capita",
       color = "Country") +
  theme_minimal() +
  theme(legend.position = "bottom")

19. Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10. Color the data by country and size it by lifeExp.

ggplot(data_Europe, aes(x = log10(pop), y = gdpPercap, color = country, size = lifeExp)) +
  geom_point(alpha = 0.7) +
  labs(title = "Population vs GDP per Capita in Europe",
       x = "Log10(Population)",
       y = "GDP per Capita",
       color = "Country",
       size = "Life Expectancy") +
  theme_minimal() +
  theme(legend.position = "bottom")

20. See the attached file in excel, namely,tourism.xls. Create a folder and give a name FORECASTING.

1) Save the tourism excel file in that FORECASTING directory.

2) Set your working directory as FORECASTING

3) Import tourism excel file into R-studio.

4) Assign a different name to this data, such as “mydata”

5) Check the structure of your dataset by str() function. Change Region column from character to factor. Use as.factor() function.

options(digits = 3, scipen = 9999, stringasFactors = FALSE)
# make sure characters are not factors. The 1st column, Quarter, needs to be NOT factor.

library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
tourism_3 <- read_excel("Business Forecasting/tourism-3.xlsx")
View(tourism_3)
mydata <- read_excel("Business Forecasting/tourism-3.xlsx")
str(mydata)
## tibble [24,320 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Quarter: chr [1:24320] "1998-01-01" "1998-04-01" "1998-07-01" "1998-10-01" ...
##  $ Region : chr [1:24320] "Adelaide" "Adelaide" "Adelaide" "Adelaide" ...
##  $ State  : chr [1:24320] "South Australia" "South Australia" "South Australia" "South Australia" ...
##  $ Purpose: chr [1:24320] "Business" "Business" "Business" "Business" ...
##  $ Trips  : num [1:24320] 135 110 166 127 137 ...
mydata$Region <- as.factor(mydata$Region)
str(mydata)
## tibble [24,320 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Quarter: chr [1:24320] "1998-01-01" "1998-04-01" "1998-07-01" "1998-10-01" ...
##  $ Region : Factor w/ 76 levels "Adelaide","Adelaide Hills",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ State  : chr [1:24320] "South Australia" "South Australia" "South Australia" "South Australia" ...
##  $ Purpose: chr [1:24320] "Business" "Business" "Business" "Business" ...
##  $ Trips  : num [1:24320] 135 110 166 127 137 ...