library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr     1.1.2     âś” readr     2.1.4
## âś” forcats   1.0.0     âś” stringr   1.5.0
## âś” ggplot2   3.4.3     âś” tibble    3.2.1
## âś” lubridate 1.9.2     âś” tidyr     1.3.0
## âś” purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
library(broom)
library(lindia)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

Model Critique:

For this lab, you’ll be working with a group of other classmates, and each group will be assigned a lab from a previous week. Your goal is to critique the models (or analyses) present in the lab.

Group:

Amritha Prakash, Sumedh Sonawane, Jason Moore, Bhushan Shelke

Description:

First, review the materials from the Lesson on Ethics and Epistemology (week 5?). This includes lecture slides, the lecture video, or the reading. You can use these as reference materials for this lab. You may even consider the reading for the week associated with the lab, or even supplementary research on the topic at hand (e.g., news outlets, historical articles, etc.).

For the lab your group has been assigned, consider issues with models, interpretations, analyses, visualizations, etc. Use this notebook as a sandbox for trying out different code, and investigating the data from a different perspective. Take notes on all the issues you see, and possible solutions (even if you would need to request more data or resources to accomplish those solutions).

Share your model critique in this notebook as your data dive submission for the week.

As a start, think about the context of the lab and consider the following:

Treat this exercise as if the analyses in your assigned lab (i.e., the one you are critiquing) were to be published, made available to the public in a press release, or used at some large company (e.g., for mpg data, imagine if Toyota used the conclusions to drive strategic decisions).

Critique -

We critiqued the Week 10 - Generalized Linear Models (Part 1) The lecture was all about plotting various features and identifying if they are suitable for linear model creation, if not how to transform the explanatory or even the response variable. Further there was discussion on logistic regression and Poisson Regression.

We looked into the first part i.e. plotting of linear models and transformation, further have certain questions and suggestions for the same.

url <- "https://raw.githubusercontent.com/leontoddjohnson/i590/main/data/apartments/apartments.csv"

apts <- read_delim(url, delim = ',')
## Rows: 492 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): in_sf, beds, bath, price, year_built, sqft, price_per_sqft, elevation
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
view (apts)
model <- lm(price ~ sqft,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = sqft, 
                       y = price)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "Price vs. sqft",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Following are some model checks for linear model, with various variables
1. Price per sqft vs Beds in New York

model <- lm(price_per_sqft ~ beds,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = beds, 
                       y = price_per_sqft)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price_per_sqft vs. beds",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.6637e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1

 

  1. Price per sqft vs Elevation in New York
model <- lm(price_per_sqft ~ elevation,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = elevation, 
                       y = price_per_sqft)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price_per_sqft vs. elevation",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'


  1. Price vs Elevation in New York
model <- lm(price ~ elevation,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = elevation, 
                       y = price)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price vs. elevation",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'


  1. Price vs year_built in New York
model <- lm(price ~ year_built,
            filter(apts, in_sf == 0))

rsquared <- summary(model)$r.squared

apts |> 
  filter(in_sf == 0) |>
  ggplot(mapping = aes(x = year_built, 
                       y = price)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', # model fit - grey , blue = how we want the data to be fit, so that desirable data is obtained
              se = FALSE) +
  geom_smooth(se = FALSE) +
  labs(title = "price vs. year_built",
       subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))) +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'