###1.
HP= read.csv("HousePrices.csv", header = TRUE, sep = ",")
data1=data.frame(HP)
head(data1)
## X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000 5850 3 1 2 yes no yes
## 2 2 38500 4000 2 1 1 yes no no
## 3 3 49500 3060 3 1 1 yes no no
## 4 4 60500 6650 3 1 2 yes yes no
## 5 5 61000 6360 2 1 1 yes no no
## 6 6 66000 4160 3 1 1 yes yes yes
## gasheat aircon garage prefer
## 1 no no 1 no
## 2 no no 0 no
## 3 no no 0 no
## 4 no no 0 no
## 5 no no 0 no
## 6 no yes 0 no
##1.a
summary(data1)
## X price lotsize bedrooms
## Min. : 1.0 Min. : 25000 Min. : 1650 Min. :1.000
## 1st Qu.:137.2 1st Qu.: 49125 1st Qu.: 3600 1st Qu.:2.000
## Median :273.5 Median : 62000 Median : 4600 Median :3.000
## Mean :273.5 Mean : 68122 Mean : 5150 Mean :2.965
## 3rd Qu.:409.8 3rd Qu.: 82000 3rd Qu.: 6360 3rd Qu.:3.000
## Max. :546.0 Max. :190000 Max. :16200 Max. :6.000
## bathrooms stories driveway recreation
## Min. :1.000 Min. :1.000 Length:546 Length:546
## 1st Qu.:1.000 1st Qu.:1.000 Class :character Class :character
## Median :1.000 Median :2.000 Mode :character Mode :character
## Mean :1.286 Mean :1.808
## 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000
## fullbase gasheat aircon garage
## Length:546 Length:546 Length:546 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Mode :character Median :0.0000
## Mean :0.6923
## 3rd Qu.:1.0000
## Max. :3.0000
## prefer
## Length:546
## Class :character
## Mode :character
##
##
##
##1.b
mean(data1$price)
## [1] 68121.6
median(data1$price)
## [1] 62000
mean(data1$lotsize)
## [1] 5150.266
median(data1$lotsize)
## [1] 4600
###2
data2=data1[1:20,]
data2
## X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000 5850 3 1 2 yes no yes
## 2 2 38500 4000 2 1 1 yes no no
## 3 3 49500 3060 3 1 1 yes no no
## 4 4 60500 6650 3 1 2 yes yes no
## 5 5 61000 6360 2 1 1 yes no no
## 6 6 66000 4160 3 1 1 yes yes yes
## 7 7 66000 3880 3 2 2 yes no yes
## 8 8 69000 4160 3 1 3 yes no no
## 9 9 83800 4800 3 1 1 yes yes yes
## 10 10 88500 5500 3 2 4 yes yes no
## 11 11 90000 7200 3 2 1 yes no yes
## 12 12 30500 3000 2 1 1 no no no
## 13 13 27000 1700 3 1 2 yes no no
## 14 14 36000 2880 3 1 1 no no no
## 15 15 37000 3600 2 1 1 yes no no
## 16 16 37900 3185 2 1 1 yes no no
## 17 17 40500 3300 3 1 2 no no no
## 18 18 40750 5200 4 1 3 yes no no
## 19 19 45000 3450 1 1 1 yes no no
## 20 20 45000 3986 2 2 1 no yes yes
## gasheat aircon garage prefer
## 1 no no 1 no
## 2 no no 0 no
## 3 no no 0 no
## 4 no no 0 no
## 5 no no 0 no
## 6 no yes 0 no
## 7 no no 2 no
## 8 no no 0 no
## 9 no no 0 no
## 10 no yes 1 no
## 11 no yes 3 no
## 12 no no 0 no
## 13 no no 0 no
## 14 no no 0 no
## 15 no no 0 no
## 16 no yes 0 no
## 17 no no 1 no
## 18 no no 0 no
## 19 no no 0 no
## 20 no no 1 no
data3=data.frame(data2$price, data2$lotsize, data2$bedrooms, data2$stories)
data3
## data2.price data2.lotsize data2.bedrooms data2.stories
## 1 42000 5850 3 2
## 2 38500 4000 2 1
## 3 49500 3060 3 1
## 4 60500 6650 3 2
## 5 61000 6360 2 1
## 6 66000 4160 3 1
## 7 66000 3880 3 2
## 8 69000 4160 3 3
## 9 83800 4800 3 1
## 10 88500 5500 3 4
## 11 90000 7200 3 1
## 12 30500 3000 2 1
## 13 27000 1700 3 2
## 14 36000 2880 3 1
## 15 37000 3600 2 1
## 16 37900 3185 2 1
## 17 40500 3300 3 2
## 18 40750 5200 4 3
## 19 45000 3450 1 1
## 20 45000 3986 2 1
###3
names(data3)=c("P","L","B","S")
data3
## P L B S
## 1 42000 5850 3 2
## 2 38500 4000 2 1
## 3 49500 3060 3 1
## 4 60500 6650 3 2
## 5 61000 6360 2 1
## 6 66000 4160 3 1
## 7 66000 3880 3 2
## 8 69000 4160 3 3
## 9 83800 4800 3 1
## 10 88500 5500 3 4
## 11 90000 7200 3 1
## 12 30500 3000 2 1
## 13 27000 1700 3 2
## 14 36000 2880 3 1
## 15 37000 3600 2 1
## 16 37900 3185 2 1
## 17 40500 3300 3 2
## 18 40750 5200 4 3
## 19 45000 3450 1 1
## 20 45000 3986 2 1
###4
summary(data3)
## P L B S
## Min. :27000 Min. :1700 Min. :1.00 Min. :1.0
## 1st Qu.:38350 1st Qu.:3271 1st Qu.:2.00 1st Qu.:1.0
## Median :45000 Median :3993 Median :3.00 Median :1.0
## Mean :52723 Mean :4296 Mean :2.65 Mean :1.6
## 3rd Qu.:66000 3rd Qu.:5275 3rd Qu.:3.00 3rd Qu.:2.0
## Max. :90000 Max. :7200 Max. :4.00 Max. :4.0
mean(data3$P)
## [1] 52722.5
median(data3$P)
## [1] 45000
mean(data3$L)
## [1] 4296.05
median(data3$L)
## [1] 3993
### the new data frame has median and mean smaller then the first data frame mean median because we have smaller data set, the mean and median is effected by the length of my data
###5
S1= cut(data3$S,breaks=c(0,1,2,3,4),labels = c("firstfloor","secondfoor", "thirdfloor", "fourthfloor"))
S1
## [1] secondfoor firstfloor firstfloor secondfoor firstfloor firstfloor
## [7] secondfoor thirdfloor firstfloor fourthfloor firstfloor firstfloor
## [13] secondfoor firstfloor firstfloor firstfloor secondfoor thirdfloor
## [19] firstfloor firstfloor
## Levels: firstfloor secondfoor thirdfloor fourthfloor
new=S1
data3$S1 <- new
data3
## P L B S S1
## 1 42000 5850 3 2 secondfoor
## 2 38500 4000 2 1 firstfloor
## 3 49500 3060 3 1 firstfloor
## 4 60500 6650 3 2 secondfoor
## 5 61000 6360 2 1 firstfloor
## 6 66000 4160 3 1 firstfloor
## 7 66000 3880 3 2 secondfoor
## 8 69000 4160 3 3 thirdfloor
## 9 83800 4800 3 1 firstfloor
## 10 88500 5500 3 4 fourthfloor
## 11 90000 7200 3 1 firstfloor
## 12 30500 3000 2 1 firstfloor
## 13 27000 1700 3 2 secondfoor
## 14 36000 2880 3 1 firstfloor
## 15 37000 3600 2 1 firstfloor
## 16 37900 3185 2 1 firstfloor
## 17 40500 3300 3 2 secondfoor
## 18 40750 5200 4 3 thirdfloor
## 19 45000 3450 1 1 firstfloor
## 20 45000 3986 2 1 firstfloor
###6
data4=data.frame(data3$P, data3$L, data3$B, data3$S1)
names(data4)=c("P2","L2","B2","S2")
data4
## P2 L2 B2 S2
## 1 42000 5850 3 secondfoor
## 2 38500 4000 2 firstfloor
## 3 49500 3060 3 firstfloor
## 4 60500 6650 3 secondfoor
## 5 61000 6360 2 firstfloor
## 6 66000 4160 3 firstfloor
## 7 66000 3880 3 secondfoor
## 8 69000 4160 3 thirdfloor
## 9 83800 4800 3 firstfloor
## 10 88500 5500 3 fourthfloor
## 11 90000 7200 3 firstfloor
## 12 30500 3000 2 firstfloor
## 13 27000 1700 3 secondfoor
## 14 36000 2880 3 firstfloor
## 15 37000 3600 2 firstfloor
## 16 37900 3185 2 firstfloor
## 17 40500 3300 3 secondfoor
## 18 40750 5200 4 thirdfloor
## 19 45000 3450 1 firstfloor
## 20 45000 3986 2 firstfloor
###7 bonus question
data5=read.csv("https://raw.githubusercontent.com/Cnadour/HW2/main/HousePrices.csv")
head(data5)
## X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000 5850 3 1 2 yes no yes
## 2 2 38500 4000 2 1 1 yes no no
## 3 3 49500 3060 3 1 1 yes no no
## 4 4 60500 6650 3 1 2 yes yes no
## 5 5 61000 6360 2 1 1 yes no no
## 6 6 66000 4160 3 1 1 yes yes yes
## gasheat aircon garage prefer
## 1 no no 1 no
## 2 no no 0 no
## 3 no no 0 no
## 4 no no 0 no
## 5 no no 0 no
## 6 no yes 0 no