R Bridge Course Final Project This is a final project to show off what you have learned. Select your data set from the list below: http://vincentarelbundock.github.io/Rdatasets/ (click on the csv index for a list). Another good source is found here:https://https://archive.ics.uci.edu/ml/datasets.html The presentation approach is up to you but it should contain the following: #5. BONUS - place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
rdata <- read.csv(url("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/Ecdat/Housing.csv"))
summary(rdata)
## X price lotsize bedrooms
## Min. : 1.0 Min. : 25000 Min. : 1650 Min. :1.000
## 1st Qu.:137.2 1st Qu.: 49125 1st Qu.: 3600 1st Qu.:2.000
## Median :273.5 Median : 62000 Median : 4600 Median :3.000
## Mean :273.5 Mean : 68122 Mean : 5150 Mean :2.965
## 3rd Qu.:409.8 3rd Qu.: 82000 3rd Qu.: 6360 3rd Qu.:3.000
## Max. :546.0 Max. :190000 Max. :16200 Max. :6.000
## bathrms stories driveway recroom fullbase gashw
## Min. :1.000 Min. :1.000 no : 77 no :449 no :355 no :521
## 1st Qu.:1.000 1st Qu.:1.000 yes:469 yes: 97 yes:191 yes: 25
## Median :1.000 Median :2.000
## Mean :1.286 Mean :1.808
## 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000
## airco garagepl prefarea
## no :373 Min. :0.0000 no :418
## yes:173 1st Qu.:0.0000 yes:128
## Median :0.0000
## Mean :0.6923
## 3rd Qu.:1.0000
## Max. :3.0000
table(rdata$driveway)
##
## no yes
## 77 469
table(rdata$garagepl)
##
## 0 1 2 3
## 300 126 108 12
table(rdata$bedrooms,rdata$bathrms,rdata$driveway)
## , , = no
##
##
## 1 2 3 4
## 1 1 0 0 0
## 2 18 2 0 0
## 3 34 4 1 0
## 4 6 6 1 0
## 5 2 2 0 0
## 6 0 0 0 0
##
## , , = yes
##
##
## 1 2 3 4
## 1 1 0 0 0
## 2 110 6 0 0
## 3 191 68 3 0
## 4 36 42 3 1
## 5 2 2 2 0
## 6 1 1 0 0
bbath <-data.frame(rdata$price, rdata$bedrooms,rdata$bathrms)
bbath1 <- bbath[rdata$price >= 100000, ]
colnames(bbath1)<-c("Price","Bed","Bath")
bbath2 <- data.frame(rdata$price, rdata$bedrooms)
colnames(bbath2)<-c("Price","Bed")
summary(bbath2)
## Price Bed
## Min. : 25000 Min. :1.000
## 1st Qu.: 49125 1st Qu.:2.000
## Median : 62000 Median :3.000
## Mean : 68122 Mean :2.965
## 3rd Qu.: 82000 3rd Qu.:3.000
## Max. :190000 Max. :6.000
mean(bbath2$Price)
## [1] 68121.6
median(bbath2$Price)
## [1] 62000
n.sub<-subset(rdata, rdata$price > 100000 & rdata$bedrooms == 4 & rdata$stories == 2)
n.sub
## X price lotsize bedrooms bathrms stories driveway recroom fullbase
## 93 93 163000 7420 4 1 2 yes yes yes
## 104 104 132000 3500 4 2 2 yes no no
## 162 162 130000 6000 4 1 2 yes no yes
## 217 217 138300 6000 4 3 2 yes yes yes
## 339 339 141000 8100 4 1 2 yes yes yes
## 361 361 130000 6600 4 2 2 yes yes yes
## 374 374 122000 6540 4 2 2 yes yes yes
## 376 376 133000 6550 4 2 2 yes no no
## 419 419 174500 7500 4 2 2 yes no yes
## 446 446 104900 11440 4 1 2 yes no yes
## 448 448 120000 5500 4 2 2 yes no yes
## 486 486 118500 4880 4 2 2 yes no no
## 519 519 101000 6240 4 2 2 yes no no
## gashw airco garagepl prefarea
## 93 no yes 2 no
## 104 yes no 2 no
## 162 no no 2 no
## 217 yes no 2 no
## 339 no yes 2 yes
## 361 no yes 1 yes
## 374 no yes 2 yes
## 376 no yes 1 yes
## 419 no yes 3 yes
## 446 no no 1 yes
## 448 no yes 1 yes
## 486 no yes 1 yes
## 519 no yes 1 no
plot(bbath2$Price,bbath2$Bed, xlab='Prices of House',ylab='# of Bedrooms' ,main='Prices of House vs. # of Bedrooms', col='green')
#Plots and Point relation
plot(bbath1$Price, bbath1$Bed, xlab='Prices of House',ylab='# of Bedrooms' ,main='Prices of House vs. # of Bedrooms', col='blue')
points(bbath1$Price[bbath1$Bed == '3'], bbath1$Bath[bbath1$Bed == '3'],pch=15,col='red')
# Histogram of relation
histp<-hist(rdata$price, freq=TRUE, xlab = "Price Range", ylab = "# of Houses", main = "Price of Houses vs. # of Houses", col="yellow" )
curve(dnorm(x, mean=mean(rdata$price), sd=sd(rdata$price)), add=TRUE, col="magenta", lwd=2)
#boxplot
boxplot(rdata$price ~ rdata$bedrooms, data=rdata, main=toupper("Prices of House vs. # of Bedrooms"), font.main=3, cex.main=1.2, xlab="# of Bedrooms", ylab="Prices of House", font.lab=3, col="orangered")
#4. Meaningful question for analysis: Please state at the beginning a meaningful question for analysis. Use the first three steps and anything else that would be helpful to answer the question you are posing from the data set you chose. Please write a brief conclusion paragraph in R markdown at the end.
library(ggplot2)
ggplot(rdata, aes(x = rdata$price, y = rdata$bedrooms)) +
geom_jitter(size = 2, color = "magenta")
ggplot(rdata, aes(x = rdata$price, y = rdata$bathrms)) +
geom_jitter(size = 2, color = "darkgreen")
#Conclusion: Based on the graphs customers will identify and visualize the number of bedroom, bathroom and stories depending on a specific budget range which take the person to take an effective decision.