#Đề bài: LÀm việc với tập dữ liệu Economics và trực quan biến tỷ lệ thu nhập cá nhân psavert của người dân Mỹ. Chúng ta cần trực quan dữ liệu theo chuỗi thời gian để xem xét tỷ lệ thu nhập của người dân Mỹ tăng hay giảm từ năm 1965 đến 2015 #Tiền xử lý dữ liệu

#Tập dữ liệu #Hiển thị 6 dl đầu tiên

data("economics")
head(economics)
## # A tibble: 6 × 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018

#Hiển thị 6 dl cuối cùng

tail(economics, n=6)
## # A tibble: 6 × 6
##   date          pce     pop psavert uempmed unemploy
##   <date>      <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
## 1 2014-11-01 12051. 319564.     7.3    13       9090
## 2 2014-12-01 12062  319746.     7.6    12.9     8717
## 3 2015-01-01 12046  319929.     7.7    13.2     8903
## 4 2015-02-01 12082. 320075.     7.9    12.9     8610
## 5 2015-03-01 12158. 320231.     7.4    12       8504
## 6 2015-04-01 12194. 320402.     7.6    11.5     8526

#Trực quan dl

ggplot(data=economics, mapping = aes(x=date, y=psavert))+
  geom_line(color = "#7d1b32", size = 0.7)+
  labs(title="Pesional Saving Rate in USA from 1967 to 2015", 
    x="Times",
    y="Pesional Saving Rate")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

apple <- getSymbols("AAPL", return.class = "data.frame", from = "2021-01-01")
tail(AAPL, n=6)
##            AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2025-12-29    272.69    274.36   272.35     273.76    23715200        273.76
## 2025-12-30    272.81    274.08   272.28     273.08    22139600        273.08
## 2025-12-31    273.06    273.68   271.75     271.86    27293600        271.86
## 2026-01-02    272.26    277.84   269.00     271.01    37838100        271.01
## 2026-01-05    270.64    271.51   266.14     267.26    45647200        267.26
## 2026-01-06    267.00    267.55   262.12     262.36    52282100        262.36

#tiền xl dl cho appple

apple <- AAPL %>%
  mutate(Date = as.Date(row.names(.))) %>%
  select(Date, AAPL.Close) %>%
  rename(Close = AAPL.Close) %>%
  mutate(Company = "Apple")
tail(apple, n=6)
##                  Date  Close Company
## 2025-12-29 2025-12-29 273.76   Apple
## 2025-12-30 2025-12-30 273.08   Apple
## 2025-12-31 2025-12-31 271.86   Apple
## 2026-01-02 2026-01-02 271.01   Apple
## 2026-01-05 2026-01-05 267.26   Apple
## 2026-01-06 2026-01-06 262.36   Apple
facebook <- getSymbols("FB", return.class = "data.frame", from = "2021-01-01")
tail(FB, n=6)
##            FB.Open FB.High FB.Low FB.Close FB.Volume FB.Adjusted
## 2025-12-29  42.226  42.226 42.226   42.226       100      42.226
## 2025-12-30  42.160  42.210 42.160   42.210       200      42.210
## 2025-12-31  42.200  42.200 42.141   42.200      1200      42.200
## 2026-01-02  42.250  42.250 42.250   42.250       100      42.250
## 2026-01-05  42.340  42.340 42.340   42.340       100      42.340
## 2026-01-06  42.440  42.449 42.410   42.449      5300      42.449
facebook <- FB %>%
  mutate(Date = as.Date(row.names(.))) %>%
  select(Date, FB.Close) %>%
  rename(Close = FB.Close) %>%
  mutate(Company = "Apple")
tail(facebook, n=6)
##                  Date  Close Company
## 2025-12-29 2025-12-29 42.226   Apple
## 2025-12-30 2025-12-30 42.210   Apple
## 2025-12-31 2025-12-31 42.200   Apple
## 2026-01-02 2026-01-02 42.250   Apple
## 2026-01-05 2026-01-05 42.340   Apple
## 2026-01-06 2026-01-06 42.449   Apple
library(quantmod)
library(dplyr)
library(ggplot2)
library(scales)

rm(list = ls())

# ===== Apple =====
getSymbols("AAPL", src = "yahoo", from = "2021-05-05")
## [1] "AAPL"
apple <- data.frame(
  Date  = as.Date(index(AAPL)),
  Close = as.numeric(AAPL$AAPL.Close)
) %>%
  mutate(Company = "Apple")

# ===== Facebook (Meta) =====
getSymbols("META", src = "yahoo", from = "2021-05-05")
## [1] "META"
facebook <- data.frame(
  Date  = as.Date(index(META)),
  Close = as.numeric(META$META.Close)
) %>%
  mutate(Company = "Facebook")

# ===== Combine =====
data_series <- rbind(apple, facebook)

end_date <- as.Date("2022-02-28")

data_series <- data_series %>%
  filter(Date <= end_date)

# ===== Plot =====
ggplot(data_series, aes(Date, Close, color = Company)) +
  geom_line(size = 1) +
  scale_x_date(date_breaks = "1 month",
               labels = date_format("%b-%Y")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = dollar) +
  coord_cartesian(ylim = c(100, 450)) +
  labs(
    title = "NASDAQ Closing Prices",
    subtitle = "From May 2021 to February 2022",
    caption = "source: Yahoo Finance",
    x = "",
    y = "Closing Price"
  ) +
  scale_color_brewer(palette = "Set1")

library(ggplot2)
library(tidyr)
library(gapminder)
library(dplyr)

data(gapminder)

# Subset the data
plotdata_long <- filter(gapminder,
                        continent == "Asia" &
                        year %in% c(1952, 2007)) %>%
  select(country, year, lifeExp)

# Convert to wide format
plotdata_wide <- spread(plotdata_long, year, lifeExp)
names(plotdata_wide) <- c("Country", "year1952", "year2007")

# Dumbbell chart (thay cho geom_dumbbell)
ggplot(plotdata_wide, aes(y = Country)) +

  # Đường nối
  geom_segment(aes(x = year1952, xend = year2007,
                   y = Country, yend = Country),
               linewidth = 0.8, color = "grey70") +

  # Điểm năm 1952
  geom_point(aes(x = year1952), size = 2, color = "steelblue") +

  # Điểm năm 2007
  geom_point(aes(x = year2007), size = 2, color = "firebrick") +

  labs(
    title = "Life Expectancy in Asia (1952 vs 2007)",
    x = "Life Expectancy",
    y = ""
  ) +
  theme_minimal()

library(ggplot2)
library(dplyr)

# Sắp xếp theo year1952
plotdata_wide <- plotdata_wide %>%
  mutate(Country = reorder(Country, year1952))

ggplot(plotdata_wide, aes(y = Country)) +

  # Đường nối
  geom_segment(aes(x = year1952, xend = year2007,
                   y = Country, yend = Country),
               linewidth = 1.2, color = "grey60") +

  # Điểm đầu (1952)
  geom_point(aes(x = year1952),
             size = 3, color = "blue") +

  # Điểm cuối (2007)
  geom_point(aes(x = year2007),
             size = 3, color = "red") +

  theme_minimal() +
  labs(
    title = "Change in Life Expectancy",
    subtitle = "From 1952 to 2007",
    x = "Life Expectancy (years)",
    y = ""
  )