Đề bài

Làm việc với dữ liệu Economics và trực quan biển tỷ lệ thu nhập cá nhân psavert của người dân Mỹ. Chúng ta cần trực quan dữ liệu theo chuỗi thời gian để xem xét tỷ lệ thu nhập của người dân Mỹ tăng hay giảm từ năm 1965 đến 2015 ## Tiền xử lý dữ liệu ### Tải thư viện

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggplot2)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(scales)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Tập dữ liệu

Hiển thị 6 dữ liệu đầu tiên

data("economics")
head(economics)
## # A tibble: 6 × 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018

Hiển thị 6 dữ liệu cuối cùng

tail(economics)
## # A tibble: 6 × 6
##   date          pce     pop psavert uempmed unemploy
##   <date>      <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
## 1 2014-11-01 12051. 319564.     7.3    13       9090
## 2 2014-12-01 12062  319746.     7.6    12.9     8717
## 3 2015-01-01 12046  319929.     7.7    13.2     8903
## 4 2015-02-01 12082. 320075.     7.9    12.9     8610
## 5 2015-03-01 12158. 320231.     7.4    12       8504
## 6 2015-04-01 12194. 320402.     7.6    11.5     8526

Hiển thị 36 dữ liệu cuối cùng

tail(economics, n = 36)
## # A tibble: 36 × 6
##    date          pce     pop psavert uempmed unemploy
##    <date>      <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
##  1 2012-05-01 10969. 313831.     8.8    19.9    12660
##  2 2012-06-01 10946. 314018.     9.1    20.4    12692
##  3 2012-07-01 10977. 314211.     8.2    17.5    12656
##  4 2012-08-01 11004. 314422.     8      18.4    12471
##  5 2012-09-01 11062. 314647.     8.2    18.8    12115
##  6 2012-10-01 11100. 314854.     8.8    19.9    12124
##  7 2012-11-01 11137. 315054.     9.7    18.6    12005
##  8 2012-12-01 11140. 315233.    12      17.7    12298
##  9 2013-01-01 11203. 315390.     6.3    15.8    12471
## 10 2013-02-01 11240. 315520.     5.8    17.2    11950
## # ℹ 26 more rows

Trực quan dữ liệu dây

ggplot(data = economics, mapping = aes(x = date, y = psavert)) + 
  geom_line(color = "#038387", size = 0.6) +
  geom_smooth() +
  scale_x_date(date_breaks = "5 years", label = date_format(("%y"))) + 
  labs(title = "Personal Saving Rate in USA from 1967 to 2015",
       subtitle = "5 years",
       x = "Time",
       y = "Personal Saving Rate") + 
theme_minimal() -> obj
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplotly(obj)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
apple <- getSymbols("AAPL", return.class = "data.frame",
from = "2021-05-05", to = "2026-05-05")
tail(AAPL, n=4)
##            AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2025-12-31    273.06    273.68   271.75     271.86    27293600        271.86
## 2026-01-02    272.26    277.84   269.00     271.01    37838100        271.01
## 2026-01-05    270.64    271.51   266.14     267.26    45647200        267.26
## 2026-01-06    267.00    267.55   262.12     262.36    52282100        262.36
apple <- AAPL %>%
  mutate(Date = as.Date(row.names(.))) %>%
  select(Date, AAPL.Close) %>%
  rename(Close = AAPL.Close) %>%
  mutate(Company = "Apple")
# Take data for Facebook
facebook <- getSymbols("META", return.class = "data.frame",
from = "2021-05-05", to = "2026-05-05")
facebook <- META %>%
mutate(Date = as.Date(row.names(.))) %>%
  select(Date, META.Close) %>%
  rename(Close = META.Close) %>%
  mutate(Company = "Facebook")
vinfast <- getSymbols("VFS", return.class = "data.frame",
from = "2023-05-05", to = "2026-05-05")
vinfast <- VFS %>%
mutate(Date = as.Date(row.names(.))) %>%
  select(Date, VFS.Close) %>%
  rename(Close = VFS.Close) %>%
  mutate(Company = "Vinfast")
# Combine data
data_series <- rbind(apple, facebook, vinfast)
# Visualization
# National Association of Securities Dealers Automated Quotation System
ggplot(data = data_series, mapping = aes(x = Date, y = Close,
color = Company)) +
geom_line(size = 1) +
scale_x_date(date_breaks = "6 months",
labels = date_format("%b-%Y")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(0, 450), breaks = seq(100,400,50),
labels = dollar) +
labs(title = "NASDAQ Closing Prices",
subtitle = "From May 2021 to February 2022",
caption = "source: Yahoo Finance",
x = "",
y = "Closing Price") +
scale_color_brewer(palette = "Set1")
## Warning: Removed 420 rows containing missing values or values outside the scale range
## (`geom_line()`).