Group members-Amritha,Sumedh,Bhushan,Jason

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
library(broom)
library(lindia)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
url <- "https://raw.githubusercontent.com/leontoddjohnson/i590/main/data/apartments/apartments.csv"

apts <- read_delim(url, delim = ',')
## Rows: 492 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): in_sf, beds, bath, price, year_built, sqft, price_per_sqft, elevation
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
view (apts)
model <- lm(price ~ sqft,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = sqft, 
                       y = price)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "Price vs. sqft",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

model <- lm(price_per_sqft ~ beds,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = beds, 
                       y = price_per_sqft)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price_per_sqft vs. beds",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.6637e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1

model <- lm(price_per_sqft ~ elevation,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = elevation, 
                       y = price_per_sqft)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price_per_sqft vs. elevation",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

model <- lm(price ~ elevation,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = elevation, 
                       y = price)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price vs. elevation",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

model <- lm(price ~ year_built,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = year_built, 
                       y = price)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price vs. year_built",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

If we question the following -> What could happen to the above different model if it deployed on a platform like Zillow? - The data set wont work well with linear model (as seen with above plots). If we put this model in the real world. It wont help both the company and buyers to make a choice on how prices fluctuate based on various factors.