1 Nộp tiểu luận.

1.2 Mô tả dữ liệu

Data from “How do Mortgage Subsidies Affect Home Ownership? Evidence from the Mid-Century GI Bills”

Dữ liệu từ “Trợ cấp thế chấp ảnh hưởng đến quyền sở hữu nhà như thế nào? Bằng chứng từ hóa đơn GI giữa thế kỷ”

  • Dữ liệu thế chấp bao gồm dữ liệu từ Fetter (2015) về tỷ lệ sở hữu nhà của nam giới, tập trung vào về việc liệu họ có được sinh ra vào đúng thời điểm để đủ điều kiện nhận trợ cấp thế chấp hay không dựa trên nghĩa vụ quân sự.

  • Khung dữ liệu có 214144 hàng và 6 biến

    • bpl Birth State
    • qob Quarter of birth
    • nonwhite White/nonwhite race indicator. 1 = Nonwhite
    • vet_wwko Veteran of either the Korean war or World War II
    • home_ownership Owns a home
    • qob_minus_kw Quarter of birth centered on eligibility for mortgage subsidy (0+ = eligible)
library(causaldata)
## Warning: package 'causaldata' was built under R version 4.3.1
data(mortgages)
head(mortgages)
## # A tibble: 6 × 6
##   bpl         qob nonwhite vet_wwko home_ownership qob_minus_kw
##   <chr>     <dbl>    <dbl>    <dbl>          <dbl>        <dbl>
## 1 Colorado      1        0        1              1        -54.5
## 2 Texas         1        0        0              0        -54.5
## 3 Minnesota     1        0        1              1        -54.5
## 4 New York      1        0        1              1        -54.5
## 5 Illinois      1        0        1              0        -54.5
## 6 Virginia      1        0        1              1        -54.5

1.3 Xuất file dữ liệu từ R sang excel

write.csv(mortgages, file="mortgages_data_lptptien.csv")

2 Phân tích dữ liệu

library(rvest) 
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.3.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.2     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()         masks stats::filter()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag()            masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.3.1
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.1
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.3.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(xts)
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.3.1
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
library(rugarch)
## Warning: package 'rugarch' was built under R version 4.3.1
## Loading required package: parallel
## 
## Attaching package: 'rugarch'
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## The following object is masked from 'package:stats':
## 
##     sigma
library(fGarch)
## Warning: package 'fGarch' was built under R version 4.3.1
## NOTE: Packages 'fBasics', 'timeDate', and 'timeSeries' are no longer
## attached to the search() path when 'fGarch' is attached.
## 
## If needed attach them yourself in your R script by e.g.,
##         require("timeSeries")
## 
## Attaching package: 'fGarch'
## 
## The following object is masked from 'package:TTR':
## 
##     volatility
library(tidyverse)
library(dplyr)
library(utf8)
library(tidyr)
library(graphics)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggplot2)

Đặt m là tên dữ liệu bản sao của mortages

m<- mortgages
m
## # A tibble: 214,144 × 6
##    bpl            qob nonwhite vet_wwko home_ownership qob_minus_kw
##    <chr>        <dbl>    <dbl>    <dbl>          <dbl>        <dbl>
##  1 Colorado         1        0        1              1        -54.5
##  2 Texas            1        0        0              0        -54.5
##  3 Minnesota        1        0        1              1        -54.5
##  4 New York         1        0        1              1        -54.5
##  5 Illinois         1        0        1              0        -54.5
##  6 Virginia         1        0        1              1        -54.5
##  7 New Jersey       1        0        1              0        -54.5
##  8 North Dakota     1        0        1              0        -54.5
##  9 Missouri         1        0        0              0        -54.5
## 10 Wisconsin        1        0        1              0        -54.5
## # ℹ 214,134 more rows

Truy vấn dữ liệu từ data mortages

names(mortgages) <- c("bq","qo","no","ve","ho","qom")
bp<- m$bpl
qo<- m$qob
no<- m$nonwhite
ve<- m$vet_wwko
ho<- m$home_ownership
qom <- m$qob_minus_kw

Lập bảng tần số

  • Biến qo
Bqo <- cut(qo, breaks = c(0,1,2,3,4), labels = c("Qúy 1","Qúy 2","Qúy 3","Qúy 4"), right = TRUE)
Bangqo= table(Bqo)
Bangqo
## Bqo
## Qúy 1 Qúy 2 Qúy 3 Qúy 4 
## 54186 52460 56093 51405
  • biến ho
Bangho = table(ho)
Bangho
## ho
##      0      1 
## 128435  85709
  • Biến qom
table(cut(qom,8))
## 
## (-54.6,-44.1] (-44.1,-33.8] (-33.8,-23.4]   (-23.4,-13]   (-13,-2.62] 
##         30691         27525         29617         26230         24614 
##  (-2.62,7.75]   (7.75,18.1]   (18.1,28.6] 
##         25825         23498         26144
  • Biến no
Bangno = table(no)
Bangno
## no
##      0      1 
## 192048  22096

Lập bảng tần số cho 2 biến

tabqono = table(qo, ho)
tabqono
##    ho
## qo      0     1
##   1 31641 22545
##   2 31378 21082
##   3 34099 21994
##   4 31317 20088

2.1 Vẽ đồ thị

2.1.1 Vẽ đồ thị trực quan hóa

barplot(Bangqo, xlab = " ", ylab = "Qúy",  main = "Biểu đồ thể hiện dữ liệu của biến Qúy sinh")

barplot(tabqono, xlab = " ", ylab = "Qúy",  main = "Biểu đồ kết hợp giữa biến qo và no ")

3 Sắp xếp dữ liệu

  • Sắp xếp theo thứ tự các quý tăng dần từ quý 1 đến quý 4
Quytang = m[order(m$qob),] 
head(Quytang)
## # A tibble: 6 × 6
##   bpl         qob nonwhite vet_wwko home_ownership qob_minus_kw
##   <chr>     <dbl>    <dbl>    <dbl>          <dbl>        <dbl>
## 1 Colorado      1        0        1              1        -54.5
## 2 Texas         1        0        0              0        -54.5
## 3 Minnesota     1        0        1              1        -54.5
## 4 New York      1        0        1              1        -54.5
## 5 Illinois      1        0        1              0        -54.5
## 6 Virginia      1        0        1              1        -54.5