urldat <- "https://raw.githubusercontent.com/kglan/MSDS/main/DATA607/Data%20Transformation/ZillowHomePrices/ZillowHomeprices.csv"
zill<- read_csv(url(urldat))
## Rows: 27319 Columns: 281
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): RegionName, RegionType, StateName, State, City, Metro, CountyName
## dbl (274): RegionID, SizeRank, 2000-01-31, 2000-02-29, 2000-03-31, 2000-04-3...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
zill
## # A tibble: 27,319 x 281
## RegionID SizeRank RegionN~1 Regio~2 State~3 State City Metro Count~4 2000-~5
## <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 91940 0 77449 zip TX TX <NA> Hous~ Harris~ 107022
## 2 91982 1 77494 zip TX TX <NA> Hous~ Fort B~ 223494
## 3 93144 2 79936 zip TX TX El P~ El P~ El Pas~ 90944
## 4 62080 3 11368 zip NY NY New ~ New ~ Queens~ 321785
## 5 62093 4 11385 zip NY NY New ~ New ~ Queens~ 284873
## 6 95992 5 90011 zip CA CA Los ~ Los ~ Los An~ 141118
## 7 84630 6 60629 zip IL IL Chic~ Chic~ Cook C~ 134265
## 8 91733 7 77084 zip TX TX Hous~ Hous~ Harris~ 106747
## 9 96361 8 91331 zip CA CA Los ~ Los ~ Los An~ 145449
## 10 96193 9 90650 zip CA CA Norw~ Los ~ Los An~ 165978
## # ... with 27,309 more rows, 271 more variables: `2000-02-29` <dbl>,
## # `2000-03-31` <dbl>, `2000-04-30` <dbl>, `2000-05-31` <dbl>,
## # `2000-06-30` <dbl>, `2000-07-31` <dbl>, `2000-08-31` <dbl>,
## # `2000-09-30` <dbl>, `2000-10-31` <dbl>, `2000-11-30` <dbl>,
## # `2000-12-31` <dbl>, `2001-01-31` <dbl>, `2001-02-28` <dbl>,
## # `2001-03-31` <dbl>, `2001-04-30` <dbl>, `2001-05-31` <dbl>,
## # `2001-06-30` <dbl>, `2001-07-31` <dbl>, `2001-08-31` <dbl>, ...
( A problem arose where some of the data had NA values, those rows without accurate and conssiten reporting data were removed)
zill <- filter(zill, State == "NY")
zill<- zill%>%
na.omit()%>%
select(c(250:281))
#Find sum on columns
price<-colMeans(zill)
#Establish Long data view
zillowNY <- data.frame(price)
zillowNY$Date <- row.names(zillowNY)
rownames(zillowNY) <- 1:nrow(zillowNY)
zillowNY
## price Date
## 1 394792.4 2020-01-31
## 2 396162.7 2020-02-29
## 3 397144.8 2020-03-31
## 4 397722.8 2020-04-30
## 5 398142.1 2020-05-31
## 6 398809.3 2020-06-30
## 7 400178.0 2020-07-31
## 8 402207.7 2020-08-31
## 9 405459.3 2020-09-30
## 10 409590.3 2020-10-31
## 11 414131.1 2020-11-30
## 12 418405.1 2020-12-31
## 13 421969.1 2021-01-31
## 14 426173.2 2021-02-28
## 15 430239.8 2021-03-31
## 16 434580.6 2021-04-30
## 17 439164.4 2021-05-31
## 18 444636.3 2021-06-30
## 19 451466.2 2021-07-31
## 20 457735.4 2021-08-31
## 21 462835.2 2021-09-30
## 22 466295.8 2021-10-31
## 23 469186.5 2021-11-30
## 24 472144.4 2021-12-31
## 25 475676.1 2022-01-31
## 26 479984.3 2022-02-28
## 27 485810.7 2022-03-31
## 28 491853.4 2022-04-30
## 29 498175.9 2022-05-31
## 30 502947.3 2022-06-30
## 31 506329.0 2022-07-31
## 32 508119.0 2022-08-31
ggplot(zillowNY, aes(x=Date, y= price))+
geom_point()+
geom_line()+
theme(axis.text.x = element_text(angle = 60, hjust = 1))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?