This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory / Take out the trash
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 687236 36.8 1308247 69.9 1308247 69.9
## Vcells 1237126 9.5 8388608 64.0 2004036 15.3
cat("\f") # Clear the console
if(!is.null(dev.list())) dev.off() # Clear all plots
## null device
## 1
library(readxl)
df <- read_excel("Raw Data NEW 02162024.xlsx")
df <- as.data.frame(df)
glimpse(df)
## Rows: 1,138
## Columns: 49
## $ `Number Of Units` <dbl> 25, 8, 18, 17, 13, 125, 2…
## $ `Year Built` <dbl> 1984, 1950, 1990, 2007, 1…
## $ `Vacancy %` <dbl> 7.04, 10.26, 6.31, 4.61, …
## $ `Land Area (SF)` <dbl> 108900, 277477, 67965, 17…
## $ `Last Sale Date` <dttm> 2022-04-20, 2022-05-13, …
## $ `Last Sale Price` <dbl> 3100000, 800000, 1000000,…
## $ `House Maint & Repair 2023 Cons Spdng $(1m)` <dbl> 2315630, 178754, 7470769,…
## $ `Household Operations 2023 Cons Spdng $(1m)` <dbl> 2875112, 197441, 8760263,…
## $ `% HH Grwth 2010-2023(1m)` <dbl> 1.17, 46.67, 2.66, 3.29, …
## $ `% HH Grwth 2023-2028(1m)` <dbl> 9.88, 7.91, -1.30, -0.64,…
## $ `2023 Households(1m)` <dbl> 2571, 139, 8107, 156, 21,…
## $ `2023 Med HH Size(1m)` <dbl> 2, 3, 2, 2, 2, 2, 2, 2, 2…
## $ `2023 Avg HH Size(1m)` <dbl> 2.3, 3.0, 2.0, 2.1, 2.2, …
## $ `% HU Grwth 2010-2023(1m)` <dbl> 8.03, 51.09, 5.03, 10.64,…
## $ `2023 Avg HU Size(1m)` <dbl> 4, 2, 9, 1, 1, 7, 9, 2, 1…
## $ `2023 Avg HU Value(1m)` <dbl> 215342, 225340, 240448, 1…
## $ `2023 Avg Yr Built(1m)` <dbl> 1971, 2003, 1974, 1984, 1…
## $ `2023 Group Quarters(1m)` <dbl> 491, 0, 130, 0, 0, 62, 20…
## $ `2023 Home Blt 1940-1949(1m)` <dbl> 849, 1, 763, 7, 4, 583, 3…
## $ `2023 Home Blt 1950-1959(1m)` <dbl> 381, 2, 1500, 4, 1, 2772,…
## $ `2023 Home Blt 1960-1969(1m)` <dbl> 277, 2, 1854, 7, 3, 1745,…
## $ `2023 Home Blt 1970-1979(1m)` <dbl> 247, 8, 3048, 83, 5, 1907…
## $ `2023 Home Blt 1980-1989(1m)` <dbl> 310, 8, 1360, 52, 6, 786,…
## $ `2023 Home Blt 1990-1999(1m)` <dbl> 284, 12, 268, 40, 8, 414,…
## $ `2023 Home Blt 2000-2010(1m)` <dbl> 372, 72, 917, 21, 10, 56,…
## $ `2023 Home Blt 2010+(1m)` <dbl> 145, 49, 283, 8, 0, 1220,…
## $ `2023 HU 1 Unit(1m)` <dbl> 1985, 120, 3791, 77, 16, …
## $ `2023 HU 20+ Units(1m)` <dbl> 88, 3, 1353, 0, 0, 1658, …
## $ `2023 HU 2-4 Units(1m)` <dbl> 127, 0, 926, 0, 0, 458, 4…
## $ `2023 HU 5-19 Units(1m)` <dbl> 432, 5, 1522, 0, 0, 546, …
## $ `2023 Med Yr Built(1m)` <dbl> 1966, 2006, 1972, 1981, 1…
## $ `2023 Owner Occ'd Housing(1m)` <dbl> 1343, 103, 4289, 124, 18,…
## $ `2023 Renter Occ'd Housing(1m)` <dbl> 1229, 36, 3818, 32, 3, 20…
## $ `2023 Home Value $1,000,000+(1m)` <dbl> 3, 0, 106, 0, 4, 15, 117,…
## $ `2023 Home Value $100,000-200,000(1m)` <dbl> 514, 28, 919, 29, 4, 1958…
## $ `2023 Home Value $200,000-300,000(1m)` <dbl> 341, 56, 671, 3, 2, 1352,…
## $ `2023 Home Value $300,000-400,000(1m)` <dbl> 106, 12, 579, 2, 2, 730, …
## $ `2023 Home Value $400,000-500,000(1m)` <dbl> 105, 1, 80, 0, 0, 355, 15…
## $ `2023 Home Value $500,000-1,000,000(1m)` <dbl> 33, 0, 361, 12, 0, 324, 6…
## $ `2023 Home Value <100,000(1m)` <dbl> 242, 6, 1573, 78, 5, 1575…
## $ `2023 Median Home Value(1m)` <dbl> 183657, 231250, 162187, 7…
## $ `% Pop Grwth 2010-2023(1m)` <dbl> 9.44, 51.61, 4.49, 10.56,…
## $ `% Pop Grwth 2023-2028(1m)` <dbl> 10.34, 8.04, -1.39, 0.00,…
## $ `2023 Population(1m)` <dbl> 6479, 423, 16666, 335, 51…
## $ `2023 Pop Wrk Trav Time <30 Min(1m)` <dbl> 1882, 104, 5237, 79, 11, …
## $ `2023 Pop Wrk Trav Time 30-60 Min(1m)` <dbl> 476, 93, 2031, 49, 2, 183…
## $ `2023 Pop Wrk Trav Time 60+ Min(1m)` <dbl> 202, 21, 377, 4, 2, 234, …
## $ `County Name` <chr> "St. Johns", "Duval", "Pi…
## $ City <chr> "Saint Augustine", "Jacks…
library(dplyr)
# Assuming your data frame is named 'df'
df <- df %>%
rename(
Number_Of_Units = `Number Of Units`,
Year_Built = `Year Built`
)
# Assuming your data frame is named 'df'
original_variable_names <- names(df) # Add more variable names if needed
# Loop through each original variable name
for (var_name in original_variable_names) {
new_var_name <- tolower(gsub(" ", "_", var_name)) # Convert to lowercase and replace spaces with underscores
df <- df %>% rename(!!new_var_name := !!var_name) # Rename the variable
}
# Print the resulting data frame
#print(df)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer::stargazer(df#,type="text"
)
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Tue, Mar 19, 2024 - 20:56:49
## \begin{table}[!htbp] \centering
## \caption{}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lccccc}
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## Statistic & \multicolumn{1}{c}{N} & \multicolumn{1}{c}{Mean} & \multicolumn{1}{c}{St. Dev.} & \multicolumn{1}{c}{Min} & \multicolumn{1}{c}{Max} \\
## \hline \\[-1.8ex]
## number\_of\_units & 1,138 & 119.289 & 158.890 & 1 & 1,519 \\
## year\_built & 1,059 & 1,970.433 & 18.469 & 1,900 & 2,022 \\
## vacancy\_\% & 1,122 & 6.209 & 5.589 & 0.060 & 100.000 \\
## land\_area\_(sf) & 1,129 & 874,976.600 & 1,868,040.000 & 0 & 25,501,431 \\
## last\_sale\_price & 1,138 & 6,119,103.000 & 15,585,241.000 & 2,600 & 363,125,000 \\
## house\_maint\_&\_repair\_2023\_cons\_spdng\_\$(1m) & 1,137 & 3,201,500.000 & 2,408,647.000 & 0 & 12,607,149 \\
## household\_operations\_2023\_cons\_spdng\_\$(1m) & 1,137 & 3,457,632.000 & 2,645,364.000 & 0 & 18,560,824 \\
## \%\_hh\_grwth\_2010-2023(1m) & 1,137 & 12.449 & 17.914 & $-$24.460 & 157.930 \\
## \%\_hh\_grwth\_2023-2028(1m) & 1,137 & 2.776 & 3.856 & $-$10.790 & 17.240 \\
## 2023\_households(1m) & 1,137 & 3,034.502 & 2,401.237 & 0 & 12,942 \\
## 2023\_med\_hh\_size(1m) & 1,137 & 2.099 & 0.326 & 0 & 4 \\
## 2023\_avg\_hh\_size(1m) & 1,137 & 2.414 & 0.362 & 0.000 & 4.100 \\
## \%\_hu\_grwth\_2010-2023(1m) & 1,137 & 17.446 & 23.320 & $-$49.330 & 188.330 \\
## 2023\_avg\_hu\_size(1m) & 1,137 & 4.586 & 3.583 & 0 & 19 \\
## 2023\_avg\_hu\_value(1m) & 1,137 & 227,786.600 & 96,678.880 & 0 & 764,965 \\
## 2023\_avg\_yr\_built(1m) & 1,137 & 1,980.332 & 83.623 & 0 & 2,008 \\
## 2023\_group\_quarters(1m) & 1,137 & 104.622 & 233.940 & 0 & 3,287 \\
## 2023\_home\_blt\_1940-1949(1m) & 1,137 & 169.369 & 309.808 & 0 & 4,433 \\
## 2023\_home\_blt\_1950-1959(1m) & 1,137 & 345.573 & 512.421 & 0 & 3,297 \\
## 2023\_home\_blt\_1960-1969(1m) & 1,137 & 489.467 & 599.101 & 0 & 4,157 \\
## 2023\_home\_blt\_1970-1979(1m) & 1,137 & 770.105 & 826.316 & 0 & 5,872 \\
## 2023\_home\_blt\_1980-1989(1m) & 1,137 & 688.462 & 619.666 & 0 & 4,214 \\
## 2023\_home\_blt\_1990-1999(1m) & 1,137 & 391.283 & 359.370 & 0 & 3,347 \\
## 2023\_home\_blt\_2000-2010(1m) & 1,137 & 390.953 & 409.312 & 0 & 3,539 \\
## 2023\_home\_blt\_2010+(1m) & 1,137 & 335.470 & 407.243 & 0 & 2,570 \\
## 2023\_hu\_1\_unit(1m) & 1,137 & 1,732.252 & 1,280.847 & 0 & 8,026 \\
## 2023\_hu\_20+\_units(1m) & 1,137 & 386.996 & 724.563 & 0 & 7,773 \\
## 2023\_hu\_2-4\_units(1m) & 1,137 & 293.786 & 391.613 & 0 & 2,743 \\
## 2023\_hu\_5-19\_units(1m) & 1,137 & 420.390 & 621.173 & 0 & 4,326 \\
## 2023\_med\_yr\_built(1m) & 1,137 & 1,979.392 & 83.813 & 0 & 2,010 \\
## 2023\_owner\_occ'd\_housing(1m) & 1,137 & 1,860.248 & 1,393.002 & 0 & 7,443 \\
## 2023\_renter\_occ'd\_housing(1m) & 1,137 & 1,174.253 & 1,233.330 & 0 & 8,801 \\
## 2023\_home\_value\_\$1,000,000+(1m) & 1,137 & 24.793 & 60.630 & 0 & 551 \\
## 2023\_home\_value\_\$100,000-200,000(1m) & 1,137 & 531.785 & 500.642 & 0 & 3,138 \\
## 2023\_home\_value\_\$200,000-300,000(1m) & 1,137 & 409.989 & 379.426 & 0 & 2,179 \\
## 2023\_home\_value\_\$300,000-400,000(1m) & 1,137 & 198.217 & 237.679 & 0 & 1,649 \\
## 2023\_home\_value\_\$400,000-500,000(1m) & 1,137 & 75.173 & 119.113 & 0 & 1,179 \\
## 2023\_home\_value\_\$500,000-1,000,000(1m) & 1,137 & 100.508 & 144.795 & 0 & 1,829 \\
## 2023\_home\_value\_\textless 100,000(1m) & 1,137 & 519.798 & 585.554 & 0 & 4,566 \\
## 2023\_median\_home\_value(1m) & 1,137 & 188,513.000 & 92,171.900 & 0 & 767,973 \\
## \%\_pop\_grwth\_2010-2023(1m) & 1,137 & 17.372 & 22.500 & $-$48.460 & 189.240 \\
## \%\_pop\_grwth\_2023-2028(1m) & 1,137 & 2.804 & 3.809 & $-$8.960 & 17.130 \\
## 2023\_population(1m) & 1,137 & 7,401.467 & 5,884.947 & 0 & 32,505 \\
## 2023\_pop\_wrk\_trav\_time\_\textless 30\_min(1m) & 1,137 & 2,057.947 & 1,764.772 & 0 & 8,511 \\
## 2023\_pop\_wrk\_trav\_time\_30-60\_min(1m) & 1,137 & 988.638 & 1,018.898 & 0 & 7,965 \\
## 2023\_pop\_wrk\_trav\_time\_60+\_min(1m) & 1,137 & 245.870 & 287.865 & 0 & 2,940 \\
## \hline \\[-1.8ex]
## \end{tabular}
## \end{table}
library(ggcorrplot)
## Loading required package: ggplot2
?ggcorrplot()
?cor
?cor_pmat # cor_pmat(): Compute a correlation matrix p-values.
mycorr<- cor(x = df[, c(1:4,6:47)], use = "pairwise.complete.obs",method = c("spearman"))
p.mat <- ggcorrplot::cor_pmat(x = df[, c(1:4,6:47)])
# head(p.mat)
library(ggcorrplot)
myplot<-ggcorrplot(corr = mycorr, # correlation matrix to visualize
method = "square", # character, the visualization method of correlation matrix to be used. Allowed values are "square" (default), "circle"
type = "lower", # character, "full" (default), "lower" or "upper" display
title = "Correlation Plot", # character, title of the graph
colors = c("red", "white","green"), # vector of 3 colors for low, mid and high correlation values.
lab = TRUE, # If TRUE, add corr coeff on the plot.
lab_size = 2, # labels. used when lab = TRUE.
p.mat = p.mat, # matrix of p-value. If NULL, arguments sig.level, insig, pch, pch.col, pch.cex is invalid. # Barring the no significant coefficient
insig = "pch", # character, specialized insignificant correlation coefficients, "pch" (default), "blank". If "blank", wipe away the corresponding glyphs; if "pch", add characters (see pch for details) on corresponding glyphs.
pch = 4, # add character on the glyphs of insignificant correlation coefficients (only valid when insig is "pch"). Default value is 4.
hc.order = TRUE, # If TRUE, correlation matrix will be hc.ordered using hclust function.
tl.cex = 8, # the size, the color and the string rotation of text label
tl.col = "black",
digits = 2
)
myplot
# Explore data
str(df)
## 'data.frame': 1138 obs. of 49 variables:
## $ number_of_units : num 25 8 18 17 13 125 236 22 85 5 ...
## $ year_built : num 1984 1950 1990 2007 1977 ...
## $ vacancy_% : num 7.04 10.26 6.31 4.61 6.29 ...
## $ land_area_(sf) : num 108900 277477 67965 174240 348480 ...
## $ last_sale_date : POSIXct, format: "2022-04-20" "2022-05-13" ...
## $ last_sale_price : num 3100000 800000 1000000 600000 750000 ...
## $ house_maint_&_repair_2023_cons_spdng_$(1m): num 2315630 178754 7470769 180919 26145 ...
## $ household_operations_2023_cons_spdng_$(1m): num 2875112 197441 8760263 158527 21370 ...
## $ %_hh_grwth_2010-2023(1m) : num 1.17 46.67 2.66 3.29 0 ...
## $ %_hh_grwth_2023-2028(1m) : num 9.88 7.91 -1.3 -0.64 0 ...
## $ 2023_households(1m) : num 2571 139 8107 156 21 ...
## $ 2023_med_hh_size(1m) : num 2 3 2 2 2 2 2 2 2 2 ...
## $ 2023_avg_hh_size(1m) : num 2.3 3 2 2.1 2.2 2.1 2.1 2.9 1.8 2.5 ...
## $ %_hu_grwth_2010-2023(1m) : num 8.03 51.09 5.03 10.64 -25 ...
## $ 2023_avg_hu_size(1m) : num 4 2 9 1 1 7 9 2 17 3 ...
## $ 2023_avg_hu_value(1m) : num 215342 225340 240448 157097 394118 ...
## $ 2023_avg_yr_built(1m) : num 1971 2003 1974 1984 1984 ...
## $ 2023_group_quarters(1m) : num 491 0 130 0 0 62 209 102 21 75 ...
## $ 2023_home_blt_1940-1949(1m) : num 849 1 763 7 4 583 363 12 77 124 ...
## $ 2023_home_blt_1950-1959(1m) : num 381 2 1500 4 1 ...
## $ 2023_home_blt_1960-1969(1m) : num 277 2 1854 7 3 ...
## $ 2023_home_blt_1970-1979(1m) : num 247 8 3048 83 5 ...
## $ 2023_home_blt_1980-1989(1m) : num 310 8 1360 52 6 ...
## $ 2023_home_blt_1990-1999(1m) : num 284 12 268 40 8 ...
## $ 2023_home_blt_2000-2010(1m) : num 372 72 917 21 10 56 135 53 70 918 ...
## $ 2023_home_blt_2010+(1m) : num 145 49 283 8 0 ...
## $ 2023_hu_1_unit(1m) : num 1985 120 3791 77 16 ...
## $ 2023_hu_20+_units(1m) : num 88 3 1353 0 0 ...
## $ 2023_hu_2-4_units(1m) : num 127 0 926 0 0 458 435 23 252 33 ...
## $ 2023_hu_5-19_units(1m) : num 432 5 1522 0 0 ...
## $ 2023_med_yr_built(1m) : num 1966 2006 1972 1981 1988 ...
## $ 2023_owner_occ'd_housing(1m) : num 1343 103 4289 124 18 ...
## $ 2023_renter_occ'd_housing(1m) : num 1229 36 3818 32 3 ...
## $ 2023_home_value_$1,000,000+(1m) : num 3 0 106 0 4 15 117 0 28 8 ...
## $ 2023_home_value_$100,000-200,000(1m) : num 514 28 919 29 4 ...
## $ 2023_home_value_$200,000-300,000(1m) : num 341 56 671 3 2 ...
## $ 2023_home_value_$300,000-400,000(1m) : num 106 12 579 2 2 730 543 4 322 151 ...
## $ 2023_home_value_$400,000-500,000(1m) : num 105 1 80 0 0 355 159 1 296 21 ...
## $ 2023_home_value_$500,000-1,000,000(1m) : num 33 0 361 12 0 324 624 0 790 99 ...
## $ 2023_home_value_<100,000(1m) : num 242 6 1573 78 5 ...
## $ 2023_median_home_value(1m) : num 183657 231250 162187 79486 187499 ...
## $ %_pop_grwth_2010-2023(1m) : num 9.44 51.61 4.49 10.56 -26.09 ...
## $ %_pop_grwth_2023-2028(1m) : num 10.34 8.04 -1.39 0 -1.96 ...
## $ 2023_population(1m) : num 6479 423 16666 335 51 ...
## $ 2023_pop_wrk_trav_time_<30_min(1m) : num 1882 104 5237 79 11 ...
## $ 2023_pop_wrk_trav_time_30-60_min(1m) : num 476 93 2031 49 2 ...
## $ 2023_pop_wrk_trav_time_60+_min(1m) : num 202 21 377 4 2 234 274 38 145 136 ...
## $ county_name : chr "St. Johns" "Duval" "Pinellas" "Okeechobee" ...
## $ city : chr "Saint Augustine" "Jacksonville" "Largo" "Okeechobee" ...
summary(df)
## number_of_units year_built vacancy_% land_area_(sf)
## Min. : 1.0 Min. :1900 Min. : 0.060 Min. : 0
## 1st Qu.: 22.0 1st Qu.:1958 1st Qu.: 4.650 1st Qu.: 118699
## Median : 60.0 Median :1971 Median : 5.680 Median : 302624
## Mean : 119.3 Mean :1970 Mean : 6.209 Mean : 874977
## 3rd Qu.: 150.0 3rd Qu.:1983 3rd Qu.: 6.680 3rd Qu.: 899078
## Max. :1519.0 Max. :2022 Max. :100.000 Max. :25501431
## NA's :79 NA's :16 NA's :9
## last_sale_date last_sale_price
## Min. :1995-05-26 00:00:00.00 Min. : 2600
## 1st Qu.:2012-01-26 00:00:00.00 1st Qu.: 740000
## Median :2018-04-29 12:00:00.00 Median : 1800000
## Mean :2015-10-06 07:53:15.07 Mean : 6119103
## 3rd Qu.:2021-05-23 06:00:00.00 3rd Qu.: 5918750
## Max. :2024-02-06 00:00:00.00 Max. :363125000
##
## house_maint_&_repair_2023_cons_spdng_$(1m)
## Min. : 0
## 1st Qu.: 1311019
## Median : 2719773
## Mean : 3201500
## 3rd Qu.: 4484145
## Max. :12607149
## NA's :1
## household_operations_2023_cons_spdng_$(1m) %_hh_grwth_2010-2023(1m)
## Min. : 0 Min. :-24.46
## 1st Qu.: 1221935 1st Qu.: 3.12
## Median : 2905937 Median : 7.86
## Mean : 3457632 Mean : 12.45
## 3rd Qu.: 5158881 3rd Qu.: 14.95
## Max. :18560824 Max. :157.93
## NA's :1 NA's :1
## %_hh_grwth_2023-2028(1m) 2023_households(1m) 2023_med_hh_size(1m)
## Min. :-10.790 Min. : 0 Min. :0.000
## 1st Qu.: 0.000 1st Qu.: 1152 1st Qu.:2.000
## Median : 2.580 Median : 2403 Median :2.000
## Mean : 2.776 Mean : 3035 Mean :2.099
## 3rd Qu.: 5.430 3rd Qu.: 4637 3rd Qu.:2.000
## Max. : 17.240 Max. :12942 Max. :4.000
## NA's :1 NA's :1 NA's :1
## 2023_avg_hh_size(1m) %_hu_grwth_2010-2023(1m) 2023_avg_hu_size(1m)
## Min. :0.000 Min. :-49.33 Min. : 0.000
## 1st Qu.:2.200 1st Qu.: 4.97 1st Qu.: 2.000
## Median :2.400 Median : 12.30 Median : 3.000
## Mean :2.414 Mean : 17.45 Mean : 4.586
## 3rd Qu.:2.600 3rd Qu.: 22.99 3rd Qu.: 7.000
## Max. :4.100 Max. :188.33 Max. :19.000
## NA's :1 NA's :1 NA's :1
## 2023_avg_hu_value(1m) 2023_avg_yr_built(1m) 2023_group_quarters(1m)
## Min. : 0 Min. : 0 Min. : 0.0
## 1st Qu.:161680 1st Qu.:1978 1st Qu.: 1.0
## Median :206830 Median :1983 Median : 21.0
## Mean :227787 Mean :1980 Mean : 104.6
## 3rd Qu.:267029 3rd Qu.:1990 3rd Qu.: 112.0
## Max. :764965 Max. :2008 Max. :3287.0
## NA's :1 NA's :1 NA's :1
## 2023_home_blt_1940-1949(1m) 2023_home_blt_1950-1959(1m)
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 21.0 1st Qu.: 31.0
## Median : 60.0 Median : 120.0
## Mean : 169.4 Mean : 345.6
## 3rd Qu.: 189.0 3rd Qu.: 414.0
## Max. :4433.0 Max. :3297.0
## NA's :1 NA's :1
## 2023_home_blt_1960-1969(1m) 2023_home_blt_1970-1979(1m)
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 62.0 1st Qu.: 165.0
## Median : 234.0 Median : 452.0
## Mean : 489.5 Mean : 770.1
## 3rd Qu.: 701.0 3rd Qu.:1122.0
## Max. :4157.0 Max. :5872.0
## NA's :1 NA's :1
## 2023_home_blt_1980-1989(1m) 2023_home_blt_1990-1999(1m)
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 228.0 1st Qu.: 150.0
## Median : 497.0 Median : 302.0
## Mean : 688.5 Mean : 391.3
## 3rd Qu.: 972.0 3rd Qu.: 509.0
## Max. :4214.0 Max. :3347.0
## NA's :1 NA's :1
## 2023_home_blt_2000-2010(1m) 2023_home_blt_2010+(1m) 2023_hu_1_unit(1m)
## Min. : 0 Min. : 0.0 Min. : 0
## 1st Qu.: 104 1st Qu.: 67.0 1st Qu.: 660
## Median : 259 Median : 199.0 Median :1514
## Mean : 391 Mean : 335.5 Mean :1732
## 3rd Qu.: 543 3rd Qu.: 428.0 3rd Qu.:2550
## Max. :3539 Max. :2570.0 Max. :8026
## NA's :1 NA's :1 NA's :1
## 2023_hu_20+_units(1m) 2023_hu_2-4_units(1m) 2023_hu_5-19_units(1m)
## Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 5 1st Qu.: 25.0 1st Qu.: 17.0
## Median : 75 Median : 147.0 Median : 165.0
## Mean : 387 Mean : 293.8 Mean : 420.4
## 3rd Qu.: 454 3rd Qu.: 411.0 3rd Qu.: 580.0
## Max. :7773 Max. :2743.0 Max. :4326.0
## NA's :1 NA's :1 NA's :1
## 2023_med_yr_built(1m) 2023_owner_occ'd_housing(1m)
## Min. : 0 Min. : 0
## 1st Qu.:1976 1st Qu.: 786
## Median :1982 Median :1574
## Mean :1979 Mean :1860
## 3rd Qu.:1989 3rd Qu.:2617
## Max. :2010 Max. :7443
## NA's :1 NA's :1
## 2023_renter_occ'd_housing(1m) 2023_home_value_$1,000,000+(1m)
## Min. : 0 Min. : 0.00
## 1st Qu.: 256 1st Qu.: 0.00
## Median : 740 Median : 0.00
## Mean :1174 Mean : 24.79
## 3rd Qu.:1777 3rd Qu.: 20.00
## Max. :8801 Max. :551.00
## NA's :1 NA's :1
## 2023_home_value_$100,000-200,000(1m) 2023_home_value_$200,000-300,000(1m)
## Min. : 0.0 Min. : 0
## 1st Qu.: 158.0 1st Qu.: 122
## Median : 372.0 Median : 302
## Mean : 531.8 Mean : 410
## 3rd Qu.: 757.0 3rd Qu.: 595
## Max. :3138.0 Max. :2179
## NA's :1 NA's :1
## 2023_home_value_$300,000-400,000(1m) 2023_home_value_$400,000-500,000(1m)
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 35.0 1st Qu.: 5.00
## Median : 107.0 Median : 33.00
## Mean : 198.2 Mean : 75.17
## 3rd Qu.: 280.0 3rd Qu.: 95.00
## Max. :1649.0 Max. :1179.00
## NA's :1 NA's :1
## 2023_home_value_$500,000-1,000,000(1m) 2023_home_value_<100,000(1m)
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 10.0 1st Qu.: 131.0
## Median : 47.0 Median : 314.0
## Mean : 100.5 Mean : 519.8
## 3rd Qu.: 133.0 3rd Qu.: 652.0
## Max. :1829.0 Max. :4566.0
## NA's :1 NA's :1
## 2023_median_home_value(1m) %_pop_grwth_2010-2023(1m) %_pop_grwth_2023-2028(1m)
## Min. : 0 Min. :-48.46 Min. :-8.960
## 1st Qu.:126721 1st Qu.: 5.43 1st Qu.: 0.000
## Median :171594 Median : 12.58 Median : 2.530
## Mean :188513 Mean : 17.37 Mean : 2.804
## 3rd Qu.:232528 3rd Qu.: 22.73 3rd Qu.: 5.400
## Max. :767973 Max. :189.24 Max. :17.130
## NA's :1 NA's :1 NA's :1
## 2023_population(1m) 2023_pop_wrk_trav_time_<30_min(1m)
## Min. : 0 Min. : 0
## 1st Qu.: 2746 1st Qu.: 608
## Median : 5854 Median :1548
## Mean : 7401 Mean :2058
## 3rd Qu.:10881 3rd Qu.:3129
## Max. :32505 Max. :8511
## NA's :1 NA's :1
## 2023_pop_wrk_trav_time_30-60_min(1m) 2023_pop_wrk_trav_time_60+_min(1m)
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 274.0 1st Qu.: 64.0
## Median : 662.0 Median : 168.0
## Mean : 988.6 Mean : 245.9
## 3rd Qu.:1427.0 3rd Qu.: 317.0
## Max. :7965.0 Max. :2940.0
## NA's :1 NA's :1
## county_name city
## Length:1138 Length:1138
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
head(df)
## number_of_units year_built vacancy_% land_area_(sf) last_sale_date
## 1 25 1984 7.04 108900 2022-04-20
## 2 8 1950 10.26 277477 2022-05-13
## 3 18 1990 6.31 67965 2023-03-20
## 4 17 2007 4.61 174240 2019-08-02
## 5 13 1977 6.29 348480 2024-01-16
## 6 125 1936 6.04 337154 2021-07-15
## last_sale_price house_maint_&_repair_2023_cons_spdng_$(1m)
## 1 3100000 2315630
## 2 800000 178754
## 3 1000000 7470769
## 4 600000 180919
## 5 750000 26145
## 6 9500000 11054731
## household_operations_2023_cons_spdng_$(1m) %_hh_grwth_2010-2023(1m)
## 1 2875112 1.17
## 2 197441 46.67
## 3 8760263 2.66
## 4 158527 3.29
## 5 21370 0.00
## 6 11175881 13.91
## %_hh_grwth_2023-2028(1m) 2023_households(1m) 2023_med_hh_size(1m)
## 1 9.88 2571 2
## 2 7.91 139 3
## 3 -1.30 8107 2
## 4 -0.64 156 2
## 5 0.00 21 2
## 6 0.40 8322 2
## 2023_avg_hh_size(1m) %_hu_grwth_2010-2023(1m) 2023_avg_hu_size(1m)
## 1 2.3 8.03 4
## 2 3.0 51.09 2
## 3 2.0 5.03 9
## 4 2.1 10.64 1
## 5 2.2 -25.00 1
## 6 2.1 15.95 7
## 2023_avg_hu_value(1m) 2023_avg_yr_built(1m) 2023_group_quarters(1m)
## 1 215342 1971 491
## 2 225340 2003 0
## 3 240448 1974 130
## 4 157097 1984 0
## 5 394118 1984 0
## 6 222175 1972 62
## 2023_home_blt_1940-1949(1m) 2023_home_blt_1950-1959(1m)
## 1 849 381
## 2 1 2
## 3 763 1500
## 4 7 4
## 5 4 1
## 6 583 2772
## 2023_home_blt_1960-1969(1m) 2023_home_blt_1970-1979(1m)
## 1 277 247
## 2 2 8
## 3 1854 3048
## 4 7 83
## 5 3 5
## 6 1745 1907
## 2023_home_blt_1980-1989(1m) 2023_home_blt_1990-1999(1m)
## 1 310 284
## 2 8 12
## 3 1360 268
## 4 52 40
## 5 6 8
## 6 786 414
## 2023_home_blt_2000-2010(1m) 2023_home_blt_2010+(1m) 2023_hu_1_unit(1m)
## 1 372 145 1985
## 2 72 49 120
## 3 917 283 3791
## 4 21 8 77
## 5 10 0 16
## 6 56 1220 6281
## 2023_hu_20+_units(1m) 2023_hu_2-4_units(1m) 2023_hu_5-19_units(1m)
## 1 88 127 432
## 2 3 0 5
## 3 1353 926 1522
## 4 0 0 0
## 5 0 0 0
## 6 1658 458 546
## 2023_med_yr_built(1m) 2023_owner_occ'd_housing(1m)
## 1 1966 1343
## 2 2006 103
## 3 1972 4289
## 4 1981 124
## 5 1988 18
## 6 1967 6310
## 2023_renter_occ'd_housing(1m) 2023_home_value_$1,000,000+(1m)
## 1 1229 3
## 2 36 0
## 3 3818 106
## 4 32 0
## 5 3 4
## 6 2012 15
## 2023_home_value_$100,000-200,000(1m) 2023_home_value_$200,000-300,000(1m)
## 1 514 341
## 2 28 56
## 3 919 671
## 4 29 3
## 5 4 2
## 6 1958 1352
## 2023_home_value_$300,000-400,000(1m) 2023_home_value_$400,000-500,000(1m)
## 1 106 105
## 2 12 1
## 3 579 80
## 4 2 0
## 5 2 0
## 6 730 355
## 2023_home_value_$500,000-1,000,000(1m) 2023_home_value_<100,000(1m)
## 1 33 242
## 2 0 6
## 3 361 1573
## 4 12 78
## 5 0 5
## 6 324 1575
## 2023_median_home_value(1m) %_pop_grwth_2010-2023(1m)
## 1 183657 9.44
## 2 231250 51.61
## 3 162187 4.49
## 4 79486 10.56
## 5 187499 -26.09
## 6 180668 14.49
## %_pop_grwth_2023-2028(1m) 2023_population(1m)
## 1 10.34 6479
## 2 8.04 423
## 3 -1.39 16666
## 4 0.00 335
## 5 -1.96 51
## 6 0.17 17448
## 2023_pop_wrk_trav_time_<30_min(1m) 2023_pop_wrk_trav_time_30-60_min(1m)
## 1 1882 476
## 2 104 93
## 3 5237 2031
## 4 79 49
## 5 11 2
## 6 6442 1838
## 2023_pop_wrk_trav_time_60+_min(1m) county_name city
## 1 202 St. Johns Saint Augustine
## 2 21 Duval Jacksonville
## 3 377 Pinellas Largo
## 4 4 Okeechobee Okeechobee
## 5 2 Levy Otter Creek
## 6 234 Pinellas Saint Petersburg
# Handle missing values
df <- na.omit(df)
# Handle duplicate records
df <- unique(df)
# Standardize variable names
names(df) <- tolower(names(df))
model1 <- lm(last_sale_price ~ `land_area_(sf)` + year_built + number_of_units, data = df)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
str(df)
## 'data.frame': 1036 obs. of 49 variables:
## $ number_of_units : num 25 8 18 17 13 125 236 85 5 20 ...
## $ year_built : num 1984 1950 1990 2007 1977 ...
## $ vacancy_% : num 7.04 10.26 6.31 4.61 6.29 ...
## $ land_area_(sf) : num 108900 277477 67965 174240 348480 ...
## $ last_sale_date : POSIXct, format: "2022-04-20" "2022-05-13" ...
## $ last_sale_price : num 3100000 800000 1000000 600000 750000 ...
## $ house_maint_&_repair_2023_cons_spdng_$(1m): num 2315630 178754 7470769 180919 26145 ...
## $ household_operations_2023_cons_spdng_$(1m): num 2875112 197441 8760263 158527 21370 ...
## $ %_hh_grwth_2010-2023(1m) : num 1.17 46.67 2.66 3.29 0 ...
## $ %_hh_grwth_2023-2028(1m) : num 9.88 7.91 -1.3 -0.64 0 ...
## $ 2023_households(1m) : num 2571 139 8107 156 21 ...
## $ 2023_med_hh_size(1m) : num 2 3 2 2 2 2 2 2 2 3 ...
## $ 2023_avg_hh_size(1m) : num 2.3 3 2 2.1 2.2 2.1 2.1 1.8 2.5 3.4 ...
## $ %_hu_grwth_2010-2023(1m) : num 8.03 51.09 5.03 10.64 -25 ...
## $ 2023_avg_hu_size(1m) : num 4 2 9 1 1 7 9 17 3 12 ...
## $ 2023_avg_hu_value(1m) : num 215342 225340 240448 157097 394118 ...
## $ 2023_avg_yr_built(1m) : num 1971 2003 1974 1984 1984 ...
## $ 2023_group_quarters(1m) : num 491 0 130 0 0 62 209 21 75 520 ...
## $ 2023_home_blt_1940-1949(1m) : num 849 1 763 7 4 ...
## $ 2023_home_blt_1950-1959(1m) : num 381 2 1500 4 1 ...
## $ 2023_home_blt_1960-1969(1m) : num 277 2 1854 7 3 ...
## $ 2023_home_blt_1970-1979(1m) : num 247 8 3048 83 5 ...
## $ 2023_home_blt_1980-1989(1m) : num 310 8 1360 52 6 ...
## $ 2023_home_blt_1990-1999(1m) : num 284 12 268 40 8 ...
## $ 2023_home_blt_2000-2010(1m) : num 372 72 917 21 10 56 135 70 918 213 ...
## $ 2023_home_blt_2010+(1m) : num 145 49 283 8 0 ...
## $ 2023_hu_1_unit(1m) : num 1985 120 3791 77 16 ...
## $ 2023_hu_20+_units(1m) : num 88 3 1353 0 0 ...
## $ 2023_hu_2-4_units(1m) : num 127 0 926 0 0 458 435 252 33 953 ...
## $ 2023_hu_5-19_units(1m) : num 432 5 1522 0 0 ...
## $ 2023_med_yr_built(1m) : num 1966 2006 1972 1981 1988 ...
## $ 2023_owner_occ'd_housing(1m) : num 1343 103 4289 124 18 ...
## $ 2023_renter_occ'd_housing(1m) : num 1229 36 3818 32 3 ...
## $ 2023_home_value_$1,000,000+(1m) : num 3 0 106 0 4 15 117 28 8 2 ...
## $ 2023_home_value_$100,000-200,000(1m) : num 514 28 919 29 4 ...
## $ 2023_home_value_$200,000-300,000(1m) : num 341 56 671 3 2 ...
## $ 2023_home_value_$300,000-400,000(1m) : num 106 12 579 2 2 ...
## $ 2023_home_value_$400,000-500,000(1m) : num 105 1 80 0 0 355 159 296 21 463 ...
## $ 2023_home_value_$500,000-1,000,000(1m) : num 33 0 361 12 0 324 624 790 99 240 ...
## $ 2023_home_value_<100,000(1m) : num 242 6 1573 78 5 ...
## $ 2023_median_home_value(1m) : num 183657 231250 162187 79486 187499 ...
## $ %_pop_grwth_2010-2023(1m) : num 9.44 51.61 4.49 10.56 -26.09 ...
## $ %_pop_grwth_2023-2028(1m) : num 10.34 8.04 -1.39 0 -1.96 ...
## $ 2023_population(1m) : num 6479 423 16666 335 51 ...
## $ 2023_pop_wrk_trav_time_<30_min(1m) : num 1882 104 5237 79 11 ...
## $ 2023_pop_wrk_trav_time_30-60_min(1m) : num 476 93 2031 49 2 ...
## $ 2023_pop_wrk_trav_time_60+_min(1m) : num 202 21 377 4 2 ...
## $ county_name : chr "St. Johns" "Duval" "Pinellas" "Okeechobee" ...
## $ city : chr "Saint Augustine" "Jacksonville" "Largo" "Okeechobee" ...
## - attr(*, "na.action")= 'omit' Named int [1:102] 8 36 44 48 79 109 114 120 129 130 ...
## ..- attr(*, "names")= chr [1:102] "8" "36" "44" "48" ...
vif(model1)
## `land_area_(sf)` year_built number_of_units
## 1.778941 1.038248 1.754210
library(ggplot2)
# Document cleaning process
# Write comments explaining each step taken
# Your data (assuming it's stored in a dataframe named 'data')
# Make sure to replace 'data' with the name of your dataframe
#df <- read_excel("~/Raw Data NEW 02162024.xlsx")
ggplot(df, aes(x = last_sale_price, y = year_built)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Last Sale Price", y = "Year Built") +
ggtitle("Relationship between Last Sale Price and Year Built") +
xlim(0, max(df$last_sale_price)) + # Adjust x-axis limits
ylim(min(df$year_built), max(df$year_built))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
ggplot(df, aes(x = last_sale_price / number_of_units, y = year_built)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Sale Price per Unit", y = "Year Built") +
ggtitle("Relationship between Sale Price per Unit and Year Built") +
xlim(0, max(df$last_sale_price / df$number_of_units)) + # Adjust x-axis limits
ylim(min(df$year_built), max(df$year_built))
## `geom_smooth()` using formula = 'y ~ x'
df_filtered <- df %>%
filter(last_sale_price / number_of_units <= 100000)
ggplot(df_filtered, aes(x = last_sale_price / number_of_units, y = year_built)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Sale Price per Unit", y = "Year Built") +
ggtitle("Relationship between Sale Price per Unit and Year Built") +
xlim(0, 100000) + # Adjust x-axis limits
ylim(min(df_filtered$year_built), max(df_filtered$year_built))
## `geom_smooth()` using formula = 'y ~ x'
df_filtered <- df %>%
filter(last_sale_price / number_of_units <= 40000)
ggplot(df_filtered, aes(x = number_of_units, y = last_sale_price / number_of_units)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Number of Units", y = "Sale Price per Unit") +
ggtitle("Relationship between Number of Units and Sale Price per Unit")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df, aes(x = `land_area_(sf)`, y = last_sale_price / number_of_units)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Land Area", y = "Sale Price per Unit") +
ggtitle("Relationship between Land Area and Sale Price per Unit")
## `geom_smooth()` using formula = 'y ~ x'
#library(glmnet)
# Prepare the data
#X <- as.matrix(df[, -which(names(df) == "last_sale_price")]) # Independent variables
#y <- df$last_sale_price / df$number_of_units # Dependent variable
# Fit Lasso regression model
#lasso_model <- cv.glmnet(X, y, alpha = 1, nfolds = 10)
# Get the selected variables
#lasso_selected_variables <- coef(lasso_model, s = "lambda.min")[-1, ]
#lasso_selected_variables <- names(lasso_selected_variables[lasso_selected_variables != 0])
# Print the selected variables
#print(lasso_selected_variables)
# Convert the dataframe to a matrix and ensure all elements are numeric
#X <- as.matrix(df[, -which(names(df) == "last_sale_price")],
#rownames.force = NA)
#X <- apply(X, 2, as.numeric)
# Scale the independent variables
#scaled_X <- scale(X)
# Impute missing values with the mean
#scaled_X[is.na(scaled_X)] <- colMeans(scaled_X, na.rm = TRUE)
#library(missForest)
#imputed_data <- missForest(scaled_X)
# Fit LASSO regression model on scaled data
#lasso_model <- cv.glmnet(imputed_data$ximp, y, alpha = 1, nfolds = 10)
# Get the selected variables
#lasso_selected_variables <- coef(lasso_model, s = "lambda.min")[-1, ]
#lasso_selected_variables <- names(lasso_selected_variables[lasso_selected_variables != 0])
# Print the selected variables
#print(lasso_selected_variables)
#If the Lasso regression model returns an empty set of selected variables ("character (0)"), it suggests that none of the variables were selected as important predictors by the Lasso model. This can happen if the regularization penalty is too strong or if none of the variables provide significant predictive power for the target variable.
#That means we need to select based on intuition or some other method
# Convert non-numeric columns to numeric
#numeric_df <- as.data.frame(sapply(df, function(x) as.numeric(as.character(x))))
# Calculate correlations
#correlations <- cor(numeric_df)
# Sort correlations with the target variable ('last sale price per unit')
#target_correlations <- correlations[, "last_sale_price_per_unit"]
# Print the top 10 positive and negative correlations with the target variable
#print("Top 10 Positive Correlations:")
#print(head(sort(target_correlations[target_correlations > 0], decreasing = TRUE), 10))
#print("Top 10 Negative Correlations:")
#print(head(sort(target_correlations[target_correlations < 0], decreasing = FALSE), 10))
# Convert non-numeric columns to numeric
#numeric_df <- as.data.frame(sapply(df, function(x) as.numeric(as.character(x))))
# Load required libraries
#library(ggplot2)
# Provide context
#cat("This analysis aims to investigate the relationship between various factors and the 'last sale price per unit' in a real estate dataset.\n")
# Step 1: Data preprocessing
# Load and preprocess the dataset
# Step 2: Calculate correlations
# Convert non-numeric columns to numeric
#numeric_df <- as.data.frame(sapply(df, function(x) as.numeric(as.character(x))))
# Exclude 'last sale price' variable
#numeric_df_without_last_sale_price <- numeric_df[, !names(numeric_df) %in% "last_sale_price"]
# Calculate 'last sale price per unit'
#numeric_df_without_last_sale_price$last_sale_price_per_unit <- #numeric_df$last_sale_price / numeric_df$number_of_units
# Calculate correlations
#correlations <- cor(numeric_df_without_last_sale_price)
# Sort correlations with the target variable ('last sale price per unit')
#target_correlations <- correlations[, "last_sale_price_per_unit"]
#library(ggplot2)
# Step 3: Visualize results
# Plot top positive and negative correlations
# Top positive correlations
#positive_correlations <- sort(target_correlations[target_correlations > 0], decreasing = TRUE)
#top_positive_vars <- names(positive_correlations)[1:3] # Select top 3 positive correlated variables
#positive_plots <- lapply(1:length(top_positive_vars), function(i) {
# ggplot(numeric_df_without_last_sale_price, aes_string(x = top_positive_vars[i], y = "last_sale_price_per_unit")) +
# geom_point() +
# geom_smooth(method = "lm", se = FALSE, color = "blue") +
#scale_x_continuous(limits = quantile(numeric_df_without_last_sale_price[[top_positive_vars[i]]], c(0.1, 0.9))) +
# scale_y_continuous(limits = quantile(numeric_df_without_last_sale_price$last_sale_price_per_unit, c(0.1, 0.9))) +
# labs(x = top_positive_vars[i], y = "Last Sale Price Per Unit") +
# ggtitle(paste(i, ". Relationship between", top_positive_vars[i], "and Last Sale Price Per Unit"))
# Top negative correlations
#negative_correlations <- sort(target_correlations[target_correlations < 0], decreasing = FALSE)
#top_negative_vars <- names(negative_correlations)[1:3] # Select top 3 negative correlated variables
#negative_plots <- lapply(1:length(top_negative_vars), function(i) {
# ggplot(numeric_df_without_last_sale_price, aes_string(x = top_negative_vars[i], y = "last_sale_price_per_unit")) +
# geom_point() +
# geom_smooth(method = "lm", se = FALSE, color = "blue") +
# scale_x_continuous(limits = #quantile(numeric_df_without_last_sale_price[[top_negative_vars[i]]], c(0.1, 0.9))) +
# scale_y_continuous(limits = quantile(numeric_df_without_last_sale_price$last_sale_price_per_unit, c(0.1, 0.9))) +
# labs(x = top_negative_vars[i], y = "Last Sale Price Per Unit") +
# ggtitle(paste(i, ". Relationship between", top_negative_vars[i], "and Last Sale Price Per Unit"))
# Print plots
#print("Top Positive Correlations:")
#print(positive_plots)
#print("Top Negative Correlations:")
#print(negative_plots)
# Step 4: Interpretation
# Summarize key findings
#cat("\nKey Findings:\n")
#cat("The analysis revealed several factors positively correlated with 'last sale price per unit', including vacancy rates and home values in certain price ranges.\n")
#cat("On the other hand, factors such as average household size and number of units showed negative correlations with 'last sale price per unit'.\n")
# Discuss implications
#cat("\nImplications:\n")
#cat("The findings suggest that vacancy rates and home values play a significant role in determining sale prices per unit in the real estate market.\n")
#cat("Understanding these relationships can help stakeholders make informed decisions when buying or selling properties.\n")
library(ggplot2)
install.packages(“glmnet”) library(glmnet)
set.seed(123) df\(`Land Area (SF)` <-
matrix(rnorm(1036*20), 1036, 20) df\)Last Sale Price
<- rnorm(1036)
lasso_model <- glmnet(df\(`Land Area
(SF)`, df\)Last Sale Price, alpha = 1)
ridge_model <- glmnet(df\(`Land Area
(SF)`, df\)Last Sale Price, alpha = 0)
print(lasso_model) print(ridge_model)
plot(lasso_model) plot(ridge_model)
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.