R Bridge Course Final Project This is a final project to show off what you have learned. Select your data set from the list below: http://vincentarelbundock.github.io/Rdatasets/ (click on the csv index for a list). Another good source is found here:https://https://archive.ics.uci.edu/ml/datasets.html The presentation approach is up to you but it should contain the following: #5. BONUS - place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.

rdata <- read.csv(url("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/Ecdat/Housing.csv"))

1. Data Exploration: This should include summary statistics, means, medians, quartiles, or any other relevant information about the data set. Please include some conclusions in the R Markdown text.

summary(rdata)
##        X             price           lotsize         bedrooms    
##  Min.   :  1.0   Min.   : 25000   Min.   : 1650   Min.   :1.000  
##  1st Qu.:137.2   1st Qu.: 49125   1st Qu.: 3600   1st Qu.:2.000  
##  Median :273.5   Median : 62000   Median : 4600   Median :3.000  
##  Mean   :273.5   Mean   : 68122   Mean   : 5150   Mean   :2.965  
##  3rd Qu.:409.8   3rd Qu.: 82000   3rd Qu.: 6360   3rd Qu.:3.000  
##  Max.   :546.0   Max.   :190000   Max.   :16200   Max.   :6.000  
##     bathrms         stories      driveway  recroom   fullbase  gashw    
##  Min.   :1.000   Min.   :1.000   no : 77   no :449   no :355   no :521  
##  1st Qu.:1.000   1st Qu.:1.000   yes:469   yes: 97   yes:191   yes: 25  
##  Median :1.000   Median :2.000                                          
##  Mean   :1.286   Mean   :1.808                                          
##  3rd Qu.:2.000   3rd Qu.:2.000                                          
##  Max.   :4.000   Max.   :4.000                                          
##  airco        garagepl      prefarea 
##  no :373   Min.   :0.0000   no :418  
##  yes:173   1st Qu.:0.0000   yes:128  
##            Median :0.0000            
##            Mean   :0.6923            
##            3rd Qu.:1.0000            
##            Max.   :3.0000
table(rdata$driveway)
## 
##  no yes 
##  77 469
table(rdata$garagepl)
## 
##   0   1   2   3 
## 300 126 108  12
table(rdata$bedrooms,rdata$bathrms,rdata$driveway)
## , ,  = no
## 
##    
##       1   2   3   4
##   1   1   0   0   0
##   2  18   2   0   0
##   3  34   4   1   0
##   4   6   6   1   0
##   5   2   2   0   0
##   6   0   0   0   0
## 
## , ,  = yes
## 
##    
##       1   2   3   4
##   1   1   0   0   0
##   2 110   6   0   0
##   3 191  68   3   0
##   4  36  42   3   1
##   5   2   2   2   0
##   6   1   1   0   0

2. Data wrangling: Please perform some basic transformations. They will need to make sense but could include column renaming, creating a subset of the data, replacing values, or creating new columns with derived data (for example - if it makes sense you could sum two columns together)

bbath <-data.frame(rdata$price, rdata$bedrooms,rdata$bathrms)

bbath1 <- bbath[rdata$price >= 100000, ]
colnames(bbath1)<-c("Price","Bed","Bath")

bbath2 <- data.frame(rdata$price, rdata$bedrooms)


colnames(bbath2)<-c("Price","Bed")

summary(bbath2)
##      Price             Bed       
##  Min.   : 25000   Min.   :1.000  
##  1st Qu.: 49125   1st Qu.:2.000  
##  Median : 62000   Median :3.000  
##  Mean   : 68122   Mean   :2.965  
##  3rd Qu.: 82000   3rd Qu.:3.000  
##  Max.   :190000   Max.   :6.000
mean(bbath2$Price)
## [1] 68121.6
median(bbath2$Price)
## [1] 62000
n.sub<-subset(rdata, rdata$price > 100000 & rdata$bedrooms == 4 & rdata$stories == 2)
n.sub
##       X  price lotsize bedrooms bathrms stories driveway recroom fullbase
## 93   93 163000    7420        4       1       2      yes     yes      yes
## 104 104 132000    3500        4       2       2      yes      no       no
## 162 162 130000    6000        4       1       2      yes      no      yes
## 217 217 138300    6000        4       3       2      yes     yes      yes
## 339 339 141000    8100        4       1       2      yes     yes      yes
## 361 361 130000    6600        4       2       2      yes     yes      yes
## 374 374 122000    6540        4       2       2      yes     yes      yes
## 376 376 133000    6550        4       2       2      yes      no       no
## 419 419 174500    7500        4       2       2      yes      no      yes
## 446 446 104900   11440        4       1       2      yes      no      yes
## 448 448 120000    5500        4       2       2      yes      no      yes
## 486 486 118500    4880        4       2       2      yes      no       no
## 519 519 101000    6240        4       2       2      yes      no       no
##     gashw airco garagepl prefarea
## 93     no   yes        2       no
## 104   yes    no        2       no
## 162    no    no        2       no
## 217   yes    no        2       no
## 339    no   yes        2      yes
## 361    no   yes        1      yes
## 374    no   yes        2      yes
## 376    no   yes        1      yes
## 419    no   yes        3      yes
## 446    no    no        1      yes
## 448    no   yes        1      yes
## 486    no   yes        1      yes
## 519    no   yes        1       no

3. Graphics: Please make sure to display at least one scatter plot, box plot and histogram. Don’t be limited to this. Please explore the many other options in R packages such as ggplot2.

plot(bbath2$Price,bbath2$Bed, xlab='Prices of House',ylab='# of Bedrooms' ,main='Prices of House vs. # of Bedrooms', col='green')

#Plots and Point relation
plot(bbath1$Price, bbath1$Bed, xlab='Prices of House',ylab='# of Bedrooms' ,main='Prices of House vs. # of Bedrooms', col='blue')

points(bbath1$Price[bbath1$Bed == '3'], bbath1$Bath[bbath1$Bed  == '3'],pch=15,col='red')

# Histogram of relation
histp<-hist(rdata$price, freq=TRUE, xlab = "Price Range",  ylab = "# of Houses", main = "Price of Houses vs. # of Houses", col="yellow"  )

curve(dnorm(x, mean=mean(rdata$price), sd=sd(rdata$price)), add=TRUE, col="magenta", lwd=2)

#boxplot
boxplot(rdata$price ~ rdata$bedrooms, data=rdata, main=toupper("Prices of House vs. # of Bedrooms"), font.main=3, cex.main=1.2, xlab="# of Bedrooms", ylab="Prices of House", font.lab=3, col="orangered")

#4. Meaningful question for analysis: Please state at the beginning a meaningful question for analysis. Use the first three steps and anything else that would be helpful to answer the question you are posing from the data set you chose. Please write a brief conclusion paragraph in R markdown at the end.

library(ggplot2)
ggplot(rdata, aes(x = rdata$price, y = rdata$bedrooms)) +
  geom_jitter(size = 2, color = "magenta")

ggplot(rdata, aes(x = rdata$price, y = rdata$bathrms)) +
  geom_jitter(size = 2, color = "darkgreen")

#Conclusion: Based on the graphs customers will identify and visualize the number of bedroom, bathroom and stories depending on a specific budget range which take the person to take an effective decision.