ACS

Author

Maria A. Nix Castro

ACS Housing Data

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1
[1] 2

You can add options to executable code like this

[1] 4
# keep adding your libraries to this cell block as you need them
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ACS_2022_Housing <- read_csv("ACS-2022-Housing.csv")
Rows: 21008 Columns: 23
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): RT, SERIALNO
dbl (21): NP, BDSP, BLD, FS, HFL, MRGP, RNTP, VALP, VEH, YRBLT, CPLT, FINCP,...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(ACS_2022_Housing)
head(ACS_2022_Housing)
# A tibble: 6 × 23
  RT    SERIALNO        NP  BDSP   BLD    FS   HFL  MRGP  RNTP  VALP   VEH YRBLT
  <chr> <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 H     2022GQ00001…     1    NA    NA     2    NA    NA    NA    NA    NA    NA
2 H     2022GQ00002…     1    NA    NA     2    NA    NA    NA    NA    NA    NA
3 H     2022GQ00003…     1    NA    NA     2    NA    NA    NA    NA    NA    NA
4 H     2022GQ00004…     1    NA    NA     1    NA    NA    NA    NA    NA    NA
5 H     2022GQ00005…     1    NA    NA     2    NA    NA    NA    NA    NA    NA
6 H     2022GQ00005…     1    NA    NA     2    NA    NA    NA    NA    NA    NA
# ℹ 11 more variables: CPLT <dbl>, FINCP <dbl>, FPARC <dbl>, HHL <dbl>,
#   HHLDRAGEP <dbl>, HHLDRRAC1P <dbl>, HHT2 <dbl>, KIT <dbl>, NPF <dbl>,
#   PLM <dbl>, TAXAMT <dbl>
# this will find the number of rows in the data frame
nrow(ACS_2022_Housing)
[1] 21008
# this will find the count and percent of each race category
ACS_2022_Housing %>%
  count(HHLDRRAC1P) %>%
  mutate(percent = 100 * n / sum(n))
# A tibble: 10 × 3
   HHLDRRAC1P     n percent
        <dbl> <int>   <dbl>
 1          1 14567 69.3   
 2          2   204  0.971 
 3          3   205  0.976 
 4          4     5  0.0238
 5          5    29  0.138 
 6          6   677  3.22  
 7          7    42  0.200 
 8          8   485  2.31  
 9          9  1432  6.82  
10         NA  3362 16.0   
5/21008
[1] 0.0002380046
summary(ACS_2022_Housing$FINCP)      # Family income
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  -6600   54200   93000  122126  149700 1479600    9987 
summary(ACS_2022_Housing$VALP)       # Property value
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   1000  300000  450000  526143  650000 3797000    8668 
summary(ACS_2022_Housing$RNTP)       # Monthly rent
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      4     870    1200    1315    1600    4800   15749 
summary(ACS_2022_Housing$TAXAMT)     # Property tax
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      0    2050    3450    4097    5250   23500    8732 
# Sample standard deviation for property tax
sd(ACS_2022_Housing$TAXAMT, na.rm = TRUE)
[1] 3278.774
# Interquartile range for monthly rent
IQR(ACS_2022_Housing$RNTP, na.rm = TRUE)
[1] 730
ACS_2022_Housing %>% filter(FINCP > 80000)
# A tibble: 6,337 × 23
   RT    SERIALNO      NP  BDSP   BLD    FS   HFL  MRGP  RNTP   VALP   VEH YRBLT
   <chr> <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
 1 H     2022HU000…     5     4     2     1     1     4    NA 550000     6  1960
 2 H     2022HU000…     3     4     2     2     6  2500    NA 750000     5  1950
 3 H     2022HU000…     2     3     2     2     1  2500    NA 735000     2  1980
 4 H     2022HU000…     2     2     2     2     8   600    NA 240000     2  1940
 5 H     2022HU000…     4     3     2     2     1    NA    NA 530000     3  1990
 6 H     2022HU000…     3     2     4     2     3     4  3000     NA     1  1960
 7 H     2022HU000…     2     2     2     2     3  1400    NA 320000     1  1970
 8 H     2022HU000…     3     2     3     2     1     4   600     NA     2  1990
 9 H     2022HU000…     4     4     2     2     3  2700    NA 700000     2  1940
10 H     2022HU000…     4     3     2     2     1    NA    NA 400000     2  1990
# ℹ 6,327 more rows
# ℹ 11 more variables: CPLT <dbl>, FINCP <dbl>, FPARC <dbl>, HHL <dbl>,
#   HHLDRAGEP <dbl>, HHLDRRAC1P <dbl>, HHT2 <dbl>, KIT <dbl>, NPF <dbl>,
#   PLM <dbl>, TAXAMT <dbl>
ACS_2022_Housing %>% filter(CPLT == 2)
# A tibble: 144 × 23
   RT    SERIALNO     NP  BDSP   BLD    FS   HFL  MRGP  RNTP    VALP   VEH YRBLT
   <chr> <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>
 1 H     2022HU00…     2     3     2     2     1  2500    NA  735000     2  1980
 2 H     2022HU00…     2     2     1     2     3     4    NA  300000     2  2000
 3 H     2022HU00…     2     3     2     2     1  1900    NA  360000     2  2000
 4 H     2022HU00…     3     5     2     2     1  4100    NA 1000000     2  1939
 5 H     2022HU00…     3     4     2     2     3     4   880      NA     1  1970
 6 H     2022HU00…     2     7     2     2     1  5600    NA  950000     2  2000
 7 H     2022HU00…     2     4     2     2     1  2000    NA  520000     2  2000
 8 H     2022HU00…     2     3     2     2     1     4    NA  750000     2  2010
 9 H     2022HU01…     2     3     2     1     3   790    NA   98000     1  1970
10 H     2022HU01…     5     4     2     1     3   980    NA  750000     1  1939
# ℹ 134 more rows
# ℹ 11 more variables: CPLT <dbl>, FINCP <dbl>, FPARC <dbl>, HHL <dbl>,
#   HHLDRAGEP <dbl>, HHLDRRAC1P <dbl>, HHT2 <dbl>, KIT <dbl>, NPF <dbl>,
#   PLM <dbl>, TAXAMT <dbl>
ACS_2022_Housing %>% filter(FINCP > 80000 & CPLT == 2) #both
# A tibble: 106 × 23
   RT    SERIALNO      NP  BDSP   BLD    FS   HFL  MRGP  RNTP   VALP   VEH YRBLT
   <chr> <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
 1 H     2022HU000…     2     3     2     2     1  2500    NA 7.35e5     2  1980
 2 H     2022HU002…     2     3     2     2     1  1900    NA 3.60e5     2  2000
 3 H     2022HU003…     3     5     2     2     1  4100    NA 1   e6     2  1939
 4 H     2022HU005…     2     7     2     2     1  5600    NA 9.5 e5     2  2000
 5 H     2022HU006…     2     4     2     2     1  2000    NA 5.20e5     2  2000
 6 H     2022HU008…     2     3     2     2     1     4    NA 7.5 e5     2  2010
 7 H     2022HU010…     2     3     4     2     3  1600    NA 3.75e5     2  1980
 8 H     2022HU011…     2     3     2     2     1  1800    NA 3.7 e5     2  1940
 9 H     2022HU017…     2     3     2     2     3  1600    NA 3.25e5     2  2010
10 H     2022HU017…     2     3     2     2     3  1600    NA 5   e5     2  1939
# ℹ 96 more rows
# ℹ 11 more variables: CPLT <dbl>, FINCP <dbl>, FPARC <dbl>, HHL <dbl>,
#   HHLDRAGEP <dbl>, HHLDRRAC1P <dbl>, HHT2 <dbl>, KIT <dbl>, NPF <dbl>,
#   PLM <dbl>, TAXAMT <dbl>
ACS_2022_Housing %>% filter(FINCP > 80000 | CPLT == 2) #either
# A tibble: 6,375 × 23
   RT    SERIALNO      NP  BDSP   BLD    FS   HFL  MRGP  RNTP   VALP   VEH YRBLT
   <chr> <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
 1 H     2022HU000…     5     4     2     1     1     4    NA 550000     6  1960
 2 H     2022HU000…     3     4     2     2     6  2500    NA 750000     5  1950
 3 H     2022HU000…     2     3     2     2     1  2500    NA 735000     2  1980
 4 H     2022HU000…     2     2     2     2     8   600    NA 240000     2  1940
 5 H     2022HU000…     4     3     2     2     1    NA    NA 530000     3  1990
 6 H     2022HU000…     3     2     4     2     3     4  3000     NA     1  1960
 7 H     2022HU000…     2     2     2     2     3  1400    NA 320000     1  1970
 8 H     2022HU000…     3     2     3     2     1     4   600     NA     2  1990
 9 H     2022HU000…     4     4     2     2     3  2700    NA 700000     2  1940
10 H     2022HU000…     4     3     2     2     1    NA    NA 400000     2  1990
# ℹ 6,365 more rows
# ℹ 11 more variables: CPLT <dbl>, FINCP <dbl>, FPARC <dbl>, HHL <dbl>,
#   HHLDRAGEP <dbl>, HHLDRRAC1P <dbl>, HHT2 <dbl>, KIT <dbl>, NPF <dbl>,
#   PLM <dbl>, TAXAMT <dbl>
ACS_2022_Housing %>% group_by(CPLT) %>% summarize(n = n())
# A tibble: 5 × 2
   CPLT     n
  <dbl> <int>
1     1  8654
2     2   144
3     3  1364
4     4    94
5    NA 10752
ACS_2022_Housing %>% group_by(CPLT) %>% summarize(avg_inc = mean(FINCP), na.rm = TRUE)
# A tibble: 5 × 3
   CPLT avg_inc na.rm
  <dbl>   <dbl> <lgl>
1     1 134963. TRUE 
2     2 147825. TRUE 
3     3     NA  TRUE 
4     4     NA  TRUE 
5    NA     NA  TRUE 

Lesson 3.5

# add library(pivottabler) at the beginning of your Quarto document