library(plotly) # for interactive visuals
## Warning: package 'plotly' was built under R version 4.1.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2) # for static visuals
library(tidyr) # for data tidying
## Warning: package 'tidyr' was built under R version 4.1.2
library(stringr) # to process character strings
## Warning: package 'stringr' was built under R version 4.1.2
library(forcats)# to process categorical data
## Warning: package 'forcats' was built under R version 4.1.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#library(MASS) # used for 3D surface plots
#library(splines) # for more complex models
#library(mgcv) # for more complex models
Create an R Markdown document and delete the existing sections/code chunks, then add a section (defined by ## Question 1). In a code chunk within this section, load the AmesHousing data and use plotly to create a violin plot displaying the distribution of sale prices for each different house style in the Ames housing data. (Hint: you should use the reference page to learn how to add a violin plot trace).
AmesHousing <- read.csv("https://raw.githubusercontent.com/ds4stats/r-tutorials/master/data-viz/data/AmesHousing.csv")
plot_ly(data = AmesHousing, type = "violin", x = ~HouseStyle, y = ~SalePrice, color = ~HouseStyle)
For Question #2, modify this code to include custom hovertext displaying the house style and that style’s median sale price on separate lines. (Hint: you should use the group_by and summarize functions in the dplyr package to get the information needed for your labels)
prop = table(AmesHousing$HouseStyle)/nrow(AmesHousing)
style = names(prop)
housing_by_style <- group_by(AmesHousing, HouseStyle)
housingsum <- summarize(housing_by_style,
median = median(SalePrice, na.rm = TRUE))
housingsum
## # A tibble: 8 x 2
## HouseStyle median
## <chr> <dbl>
## 1 1.5Fin 129675
## 2 1.5Unf 113000
## 3 1Story 155000
## 4 2.5Fin 194000
## 5 2.5Unf 160950
## 6 2Story 189000
## 7 SFoyer 143000
## 8 SLvl 165000
plot_ly(data = AmesHousing) %>%
add_trace(type = "pie", labels = ~style, values = ~prop, hoverinfo = 'text',
text = ~paste("House Style:", housingsum$HouseStyle, "<br>", "Median Sale Price:", housingsum$median)
)
model <- lm(SalePrice ~ LotArea + GrLivArea, data = AmesHousing)
xs <- seq(0, 2000000, by = 200)
ys <- seq(0,5000, by = 50)
grid <- expand.grid(xs,ys)
names(grid) <- c("LotArea", "GrLivArea")
z <- predict(model, newdata = grid)
m <- matrix(z, nrow = length(unique(grid$LotArea)), ncol = length(unique(grid$GrLivArea)))
plot_ly() %>% add_surface(x = ~xs, y = ~ys, z = ~m, colors = c("#d1d1d1", "#000000")) %>%
add_markers(x = ~AmesHousing$LotArea, y = ~AmesHousing$GrLivArea, z = ~AmesHousing$SalePrice, colors = I("green"))