Create CSV and read data

library(readr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

property_sales <- read.csv("/Users/blessinga/Desktop/nyc-property-sales.csv", header=TRUE, sep=",")

str(property_sales)
## 'data.frame':    1048575 obs. of  21 variables:
##  $ BOROUGH                       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ NEIGHBORHOOD                  : chr  "ALPHABET CITY" "ALPHABET CITY" "ALPHABET CITY" "ALPHABET CITY" ...
##  $ BUILDING.CLASS.CATEGORY       : chr  "01 ONE FAMILY DWELLINGS" "02 TWO FAMILY DWELLINGS" "07 RENTALS - WALKUP APARTMENTS" "07 RENTALS - WALKUP APARTMENTS" ...
##  $ TAX.CLASS.AT.PRESENT          : chr  "1" "1" "2B" "2B" ...
##  $ BLOCK                         : int  374 377 373 373 376 377 377 379 389 389 ...
##  $ LOT                           : int  46 1 16 17 54 52 52 25 45 47 ...
##  $ EASE.MENT                     : chr  "" "" "" "" ...
##  $ BUILDING.CLASS.AT.PRESENT     : chr  "A4" "S2" "C1" "C1" ...
##  $ ADDRESS                       : chr  "347 EAST 4TH STREET" "110 AVENUE C" "326 EAST 4TH STREET" "328 EAST 4TH STREET" ...
##  $ APARTMENT.NUMBER              : chr  "" "" "" "" ...
##  $ ZIP.CODE                      : int  10009 10009 10009 10009 10009 10009 10009 10009 10009 10009 ...
##  $ RESIDENTIAL.UNITS             : int  1 2 10 10 20 5 5 7 10 10 ...
##  $ COMMERCIAL.UNITS              : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ TOTAL.UNITS                   : int  1 3 10 10 20 5 5 8 10 10 ...
##  $ LAND.SQUARE.FEET              : int  2116 1502 2204 2204 2302 2168 2169 4337 2271 2248 ...
##  $ GROSS.SQUARE.FEET             : int  4400 2790 8625 8625 9750 3728 3728 14347 6985 6985 ...
##  $ YEAR.BUILT                    : int  1900 1901 1899 1900 1900 1900 1900 1920 1900 1910 ...
##  $ TAX.CLASS.AT.TIME.OF.SALE     : int  1 1 2 2 2 2 2 2 2 2 ...
##  $ BUILDING.CLASS.AT.TIME.OF.SALE: chr  "A4" "S2" "C1" "C1" ...
##  $ SALE.PRICE                    : num  399000 2999999 16800000 16800000 158822 ...
##  $ SALE.DATE                     : chr  "Sep-22" "Sep-22" "Aug-22" "Aug-22" ...
summary(property_sales)
##     BOROUGH     NEIGHBORHOOD       BUILDING.CLASS.CATEGORY TAX.CLASS.AT.PRESENT
##  Min.   :1.00   Length:1048575     Length:1048575          Length:1048575      
##  1st Qu.:1.00   Class :character   Class :character        Class :character    
##  Median :3.00   Mode  :character   Mode  :character        Mode  :character    
##  Mean   :2.89                                                                  
##  3rd Qu.:4.00                                                                  
##  Max.   :5.00                                                                  
##                                                                                
##      BLOCK            LOT          EASE.MENT         BUILDING.CLASS.AT.PRESENT
##  Min.   :    1   Min.   :   1.0   Length:1048575     Length:1048575           
##  1st Qu.: 1217   1st Qu.:  22.0   Class :character   Class :character         
##  Median : 2997   Median :  51.0   Mode  :character   Mode  :character         
##  Mean   : 4060   Mean   : 391.6                                               
##  3rd Qu.: 6019   3rd Qu.:1003.0                                               
##  Max.   :16350   Max.   :9139.0                                               
##                                                                               
##    ADDRESS          APARTMENT.NUMBER      ZIP.CODE     RESIDENTIAL.UNITS 
##  Length:1048575     Length:1048575     Min.   :    0   Min.   :    0.00  
##  Class :character   Class :character   1st Qu.:10128   1st Qu.:    0.00  
##  Mode  :character   Mode  :character   Median :11207   Median :    1.00  
##                                        Mean   :10791   Mean   :    2.33  
##                                        3rd Qu.:11355   3rd Qu.:    2.00  
##                                        Max.   :11697   Max.   :13480.00  
##                                        NA's   :35      NA's   :76752     
##  COMMERCIAL.UNITS   TOTAL.UNITS       LAND.SQUARE.FEET   GROSS.SQUARE.FEET
##  Min.   :   0.00   Min.   :    0.00   Min.   :       0   Min.   :      0  
##  1st Qu.:   0.00   1st Qu.:    1.00   1st Qu.:       0   1st Qu.:      0  
##  Median :   0.00   Median :    1.00   Median :    1995   Median :   1313  
##  Mean   :   0.25   Mean   :    2.59   Mean   :    3669   Mean   :   4194  
##  3rd Qu.:   0.00   3rd Qu.:    2.00   3rd Qu.:    3000   3rd Qu.:   2352  
##  Max.   :2261.00   Max.   :13495.00   Max.   :29305534   Max.   :8942176  
##  NA's   :113608    NA's   :70732      NA's   :119629     NA's   :119628   
##    YEAR.BUILT    TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE
##  Min.   :   0    Min.   :1.000             Length:1048575                
##  1st Qu.:1920    1st Qu.:1.000             Class :character              
##  Median :1940    Median :2.000             Mode  :character              
##  Mean   :1810    Mean   :1.719                                           
##  3rd Qu.:1967    3rd Qu.:2.000                                           
##  Max.   :2022    Max.   :4.000                                           
##  NA's   :24023                                                           
##    SALE.PRICE         SALE.DATE        
##  Min.   :0.000e+00   Length:1048575    
##  1st Qu.:0.000e+00   Class :character  
##  Median :3.853e+05   Mode  :character  
##  Mean   :1.183e+06                     
##  3rd Qu.:8.100e+05                     
##  Max.   :4.111e+09                     
## 

Remove columns

## Remove columns
nyc_sales <- property_sales %>%
  select(-c(BUILDING.CLASS.CATEGORY, TAX.CLASS.AT.PRESENT, BLOCK,LOT,EASE.MENT, BUILDING.CLASS.AT.PRESENT,ADDRESS, APARTMENT.NUMBER,ZIP.CODE,RESIDENTIAL.UNITS, COMMERCIAL.UNITS,TOTAL.UNITS,LAND.SQUARE.FEET,GROSS.SQUARE.FEET,TAX.CLASS.AT.TIME.OF.SALE,BUILDING.CLASS.AT.TIME.OF.SALE))

nyc_sales <- nyc_sales %>%
  na.omit()  

head(nyc_sales)
##   BOROUGH  NEIGHBORHOOD YEAR.BUILT SALE.PRICE SALE.DATE
## 1       1 ALPHABET CITY       1900     399000    Sep-22
## 2       1 ALPHABET CITY       1901    2999999    Sep-22
## 3       1 ALPHABET CITY       1899   16800000    Aug-22
## 4       1 ALPHABET CITY       1900   16800000    Aug-22
## 5       1 ALPHABET CITY       1900     158822    Sep-22
## 6       1 ALPHABET CITY       1900          0    Aug-22

Remame Columns and Analyze

Analysis Part 1

nyc_sales2 <-rename(nyc_sales, Borough = BOROUGH, Neighborhood = NEIGHBORHOOD, SalePrice = SALE.PRICE, SaleDate=SALE.DATE, YearBuilt = YEAR.BUILT) 


sales_summary <- nyc_sales2 %>%
  group_by(Borough) %>%
  summarise(mean_sale_price = mean(SalePrice),
            median_sale_price = median(SalePrice),
            min_sale_price = min(SalePrice),
            max_sale_price = max(SalePrice))

print(sales_summary)
## # A tibble: 5 × 5
##   Borough mean_sale_price median_sale_price min_sale_price max_sale_price
##     <int>           <dbl>             <dbl>          <dbl>          <dbl>
## 1       1        2641497.            695000              0     4111111766
## 2       2         695843.            250000              0      137298279
## 3       3         796102.            370000              0      869612895
## 4       4         601479.            295000              0      481093581
## 5       5         381611.            349900              0      122000000

Analysis Part 2

## line plot by borough it was meant to have colors by borough
library(ggplot2)
 ggplot(nyc_sales2, aes(x = SaleDate, y = SalePrice, color = Borough)) +
  geom_line() +
  labs(title = "Sale Price Trends Over Time",
       x = "Sale Date",
       y = "Sale Price") +
  theme_minimal()

Analysis Conclusion

Final Analysis based on Analysis Part 1 :

Borough 1 and 3 have the highest mean, median and max sales prices but most especially borough 1. Borough 1 most likely might be a borough where things are high in cost, from rent, food, groceries, and much more. It most likely might also be a borough near tourists areas and where many visitors tend to go to. Example Manhattan. Borough 1 properties have a higher value compared to the other boroughs.