###1.
HP= read.csv("HousePrices.csv", header = TRUE, sep = ",")
data1=data.frame(HP)
head(data1)
## X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000 5850 3 1 2 yes no yes
## 2 2 38500 4000 2 1 1 yes no no
## 3 3 49500 3060 3 1 1 yes no no
## 4 4 60500 6650 3 1 2 yes yes no
## 5 5 61000 6360 2 1 1 yes no no
## 6 6 66000 4160 3 1 1 yes yes yes
## gasheat aircon garage prefer
## 1 no no 1 no
## 2 no no 0 no
## 3 no no 0 no
## 4 no no 0 no
## 5 no no 0 no
## 6 no yes 0 no
##1.a
summary(data1)
## X price lotsize bedrooms
## Min. : 1.0 Min. : 25000 Min. : 1650 Min. :1.000
## 1st Qu.:137.2 1st Qu.: 49125 1st Qu.: 3600 1st Qu.:2.000
## Median :273.5 Median : 62000 Median : 4600 Median :3.000
## Mean :273.5 Mean : 68122 Mean : 5150 Mean :2.965
## 3rd Qu.:409.8 3rd Qu.: 82000 3rd Qu.: 6360 3rd Qu.:3.000
## Max. :546.0 Max. :190000 Max. :16200 Max. :6.000
## bathrooms stories driveway recreation
## Min. :1.000 Min. :1.000 Length:546 Length:546
## 1st Qu.:1.000 1st Qu.:1.000 Class :character Class :character
## Median :1.000 Median :2.000 Mode :character Mode :character
## Mean :1.286 Mean :1.808
## 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000
## fullbase gasheat aircon garage
## Length:546 Length:546 Length:546 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Mode :character Median :0.0000
## Mean :0.6923
## 3rd Qu.:1.0000
## Max. :3.0000
## prefer
## Length:546
## Class :character
## Mode :character
##
##
##
##1.b
mean(data1$price)
## [1] 68121.6
median(data1$price)
## [1] 62000
mean(data1$lotsize)
## [1] 5150.266
median(data1$lotsize)
## [1] 4600
###2
data2=data1[1:20,]
data3=data.frame(data2$price, data2$lotsize, data2$bedrooms, data2$stories)
head(data3)
## data2.price data2.lotsize data2.bedrooms data2.stories
## 1 42000 5850 3 2
## 2 38500 4000 2 1
## 3 49500 3060 3 1
## 4 60500 6650 3 2
## 5 61000 6360 2 1
## 6 66000 4160 3 1
###3
names(data3)=c("P","L","B","S")
head(data3)
## P L B S
## 1 42000 5850 3 2
## 2 38500 4000 2 1
## 3 49500 3060 3 1
## 4 60500 6650 3 2
## 5 61000 6360 2 1
## 6 66000 4160 3 1
###4
summary(data3)
## P L B S
## Min. :27000 Min. :1700 Min. :1.00 Min. :1.0
## 1st Qu.:38350 1st Qu.:3271 1st Qu.:2.00 1st Qu.:1.0
## Median :45000 Median :3993 Median :3.00 Median :1.0
## Mean :52723 Mean :4296 Mean :2.65 Mean :1.6
## 3rd Qu.:66000 3rd Qu.:5275 3rd Qu.:3.00 3rd Qu.:2.0
## Max. :90000 Max. :7200 Max. :4.00 Max. :4.0
mean(data3$P)
## [1] 52722.5
median(data3$P)
## [1] 45000
mean(data3$L)
## [1] 4296.05
median(data3$L)
## [1] 3993
### the new data frame has median and mean smaller then the first data frame mean median because we have smaller data set, the mean and median is effected by the length of my data
###5
S1= cut(data3$S,breaks=c(0,1,2,3,4),labels = c("firstfloor","secondfoor", "thirdfloor", "fourthfloor"))
S1
## [1] secondfoor firstfloor firstfloor secondfoor firstfloor firstfloor
## [7] secondfoor thirdfloor firstfloor fourthfloor firstfloor firstfloor
## [13] secondfoor firstfloor firstfloor firstfloor secondfoor thirdfloor
## [19] firstfloor firstfloor
## Levels: firstfloor secondfoor thirdfloor fourthfloor
new=S1
data3$S1 <- new
head(data3)
## P L B S S1
## 1 42000 5850 3 2 secondfoor
## 2 38500 4000 2 1 firstfloor
## 3 49500 3060 3 1 firstfloor
## 4 60500 6650 3 2 secondfoor
## 5 61000 6360 2 1 firstfloor
## 6 66000 4160 3 1 firstfloor
###6
data4=data.frame(data3$P, data3$L, data3$B, data3$S1)
names(data4)=c("P2","L2","B2","S2")
head(data4)
## P2 L2 B2 S2
## 1 42000 5850 3 secondfoor
## 2 38500 4000 2 firstfloor
## 3 49500 3060 3 firstfloor
## 4 60500 6650 3 secondfoor
## 5 61000 6360 2 firstfloor
## 6 66000 4160 3 firstfloor
###7 bonus question
data5=read.csv("https://raw.githubusercontent.com/Cnadour/HW2/main/HousePrices.csv")
head(data5)
## X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000 5850 3 1 2 yes no yes
## 2 2 38500 4000 2 1 1 yes no no
## 3 3 49500 3060 3 1 1 yes no no
## 4 4 60500 6650 3 1 2 yes yes no
## 5 5 61000 6360 2 1 1 yes no no
## 6 6 66000 4160 3 1 1 yes yes yes
## gasheat aircon garage prefer
## 1 no no 1 no
## 2 no no 0 no
## 3 no no 0 no
## 4 no no 0 no
## 5 no no 0 no
## 6 no yes 0 no