###1.

HP= read.csv("HousePrices.csv", header = TRUE, sep = ",")
data1=data.frame(HP)
head(data1)
##   X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000    5850        3         1       2      yes         no      yes
## 2 2 38500    4000        2         1       1      yes         no       no
## 3 3 49500    3060        3         1       1      yes         no       no
## 4 4 60500    6650        3         1       2      yes        yes       no
## 5 5 61000    6360        2         1       1      yes         no       no
## 6 6 66000    4160        3         1       1      yes        yes      yes
##   gasheat aircon garage prefer
## 1      no     no      1     no
## 2      no     no      0     no
## 3      no     no      0     no
## 4      no     no      0     no
## 5      no     no      0     no
## 6      no    yes      0     no
##1.a

summary(data1)
##        X             price           lotsize         bedrooms    
##  Min.   :  1.0   Min.   : 25000   Min.   : 1650   Min.   :1.000  
##  1st Qu.:137.2   1st Qu.: 49125   1st Qu.: 3600   1st Qu.:2.000  
##  Median :273.5   Median : 62000   Median : 4600   Median :3.000  
##  Mean   :273.5   Mean   : 68122   Mean   : 5150   Mean   :2.965  
##  3rd Qu.:409.8   3rd Qu.: 82000   3rd Qu.: 6360   3rd Qu.:3.000  
##  Max.   :546.0   Max.   :190000   Max.   :16200   Max.   :6.000  
##    bathrooms        stories        driveway          recreation       
##  Min.   :1.000   Min.   :1.000   Length:546         Length:546        
##  1st Qu.:1.000   1st Qu.:1.000   Class :character   Class :character  
##  Median :1.000   Median :2.000   Mode  :character   Mode  :character  
##  Mean   :1.286   Mean   :1.808                                        
##  3rd Qu.:2.000   3rd Qu.:2.000                                        
##  Max.   :4.000   Max.   :4.000                                        
##    fullbase           gasheat             aircon              garage      
##  Length:546         Length:546         Length:546         Min.   :0.0000  
##  Class :character   Class :character   Class :character   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.0000  
##                                                           Mean   :0.6923  
##                                                           3rd Qu.:1.0000  
##                                                           Max.   :3.0000  
##     prefer         
##  Length:546        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
##1.b
mean(data1$price)
## [1] 68121.6
median(data1$price)
## [1] 62000
mean(data1$lotsize)
## [1] 5150.266
median(data1$lotsize)
## [1] 4600
###2
data2=data1[1:20,]
data2
##     X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1   1 42000    5850        3         1       2      yes         no      yes
## 2   2 38500    4000        2         1       1      yes         no       no
## 3   3 49500    3060        3         1       1      yes         no       no
## 4   4 60500    6650        3         1       2      yes        yes       no
## 5   5 61000    6360        2         1       1      yes         no       no
## 6   6 66000    4160        3         1       1      yes        yes      yes
## 7   7 66000    3880        3         2       2      yes         no      yes
## 8   8 69000    4160        3         1       3      yes         no       no
## 9   9 83800    4800        3         1       1      yes        yes      yes
## 10 10 88500    5500        3         2       4      yes        yes       no
## 11 11 90000    7200        3         2       1      yes         no      yes
## 12 12 30500    3000        2         1       1       no         no       no
## 13 13 27000    1700        3         1       2      yes         no       no
## 14 14 36000    2880        3         1       1       no         no       no
## 15 15 37000    3600        2         1       1      yes         no       no
## 16 16 37900    3185        2         1       1      yes         no       no
## 17 17 40500    3300        3         1       2       no         no       no
## 18 18 40750    5200        4         1       3      yes         no       no
## 19 19 45000    3450        1         1       1      yes         no       no
## 20 20 45000    3986        2         2       1       no        yes      yes
##    gasheat aircon garage prefer
## 1       no     no      1     no
## 2       no     no      0     no
## 3       no     no      0     no
## 4       no     no      0     no
## 5       no     no      0     no
## 6       no    yes      0     no
## 7       no     no      2     no
## 8       no     no      0     no
## 9       no     no      0     no
## 10      no    yes      1     no
## 11      no    yes      3     no
## 12      no     no      0     no
## 13      no     no      0     no
## 14      no     no      0     no
## 15      no     no      0     no
## 16      no    yes      0     no
## 17      no     no      1     no
## 18      no     no      0     no
## 19      no     no      0     no
## 20      no     no      1     no
data3=data.frame(data2$price, data2$lotsize, data2$bedrooms, data2$stories)
data3
##    data2.price data2.lotsize data2.bedrooms data2.stories
## 1        42000          5850              3             2
## 2        38500          4000              2             1
## 3        49500          3060              3             1
## 4        60500          6650              3             2
## 5        61000          6360              2             1
## 6        66000          4160              3             1
## 7        66000          3880              3             2
## 8        69000          4160              3             3
## 9        83800          4800              3             1
## 10       88500          5500              3             4
## 11       90000          7200              3             1
## 12       30500          3000              2             1
## 13       27000          1700              3             2
## 14       36000          2880              3             1
## 15       37000          3600              2             1
## 16       37900          3185              2             1
## 17       40500          3300              3             2
## 18       40750          5200              4             3
## 19       45000          3450              1             1
## 20       45000          3986              2             1
###3

names(data3)=c("P","L","B","S")
data3
##        P    L B S
## 1  42000 5850 3 2
## 2  38500 4000 2 1
## 3  49500 3060 3 1
## 4  60500 6650 3 2
## 5  61000 6360 2 1
## 6  66000 4160 3 1
## 7  66000 3880 3 2
## 8  69000 4160 3 3
## 9  83800 4800 3 1
## 10 88500 5500 3 4
## 11 90000 7200 3 1
## 12 30500 3000 2 1
## 13 27000 1700 3 2
## 14 36000 2880 3 1
## 15 37000 3600 2 1
## 16 37900 3185 2 1
## 17 40500 3300 3 2
## 18 40750 5200 4 3
## 19 45000 3450 1 1
## 20 45000 3986 2 1
###4

summary(data3)
##        P               L              B              S      
##  Min.   :27000   Min.   :1700   Min.   :1.00   Min.   :1.0  
##  1st Qu.:38350   1st Qu.:3271   1st Qu.:2.00   1st Qu.:1.0  
##  Median :45000   Median :3993   Median :3.00   Median :1.0  
##  Mean   :52723   Mean   :4296   Mean   :2.65   Mean   :1.6  
##  3rd Qu.:66000   3rd Qu.:5275   3rd Qu.:3.00   3rd Qu.:2.0  
##  Max.   :90000   Max.   :7200   Max.   :4.00   Max.   :4.0
mean(data3$P)
## [1] 52722.5
median(data3$P)
## [1] 45000
mean(data3$L)
## [1] 4296.05
median(data3$L)
## [1] 3993
### the new data frame has median and mean  smaller then the first data frame mean median because we have smaller data set, the mean and median is effected by the length of my data 

###5

S1= cut(data3$S,breaks=c(0,1,2,3,4),labels = c("firstfloor","secondfoor", "thirdfloor", "fourthfloor"))
S1
##  [1] secondfoor  firstfloor  firstfloor  secondfoor  firstfloor  firstfloor 
##  [7] secondfoor  thirdfloor  firstfloor  fourthfloor firstfloor  firstfloor 
## [13] secondfoor  firstfloor  firstfloor  firstfloor  secondfoor  thirdfloor 
## [19] firstfloor  firstfloor 
## Levels: firstfloor secondfoor thirdfloor fourthfloor
new=S1

data3$S1 <- new

data3 
##        P    L B S          S1
## 1  42000 5850 3 2  secondfoor
## 2  38500 4000 2 1  firstfloor
## 3  49500 3060 3 1  firstfloor
## 4  60500 6650 3 2  secondfoor
## 5  61000 6360 2 1  firstfloor
## 6  66000 4160 3 1  firstfloor
## 7  66000 3880 3 2  secondfoor
## 8  69000 4160 3 3  thirdfloor
## 9  83800 4800 3 1  firstfloor
## 10 88500 5500 3 4 fourthfloor
## 11 90000 7200 3 1  firstfloor
## 12 30500 3000 2 1  firstfloor
## 13 27000 1700 3 2  secondfoor
## 14 36000 2880 3 1  firstfloor
## 15 37000 3600 2 1  firstfloor
## 16 37900 3185 2 1  firstfloor
## 17 40500 3300 3 2  secondfoor
## 18 40750 5200 4 3  thirdfloor
## 19 45000 3450 1 1  firstfloor
## 20 45000 3986 2 1  firstfloor
###6
data4=data.frame(data3$P, data3$L, data3$B, data3$S1)
names(data4)=c("P2","L2","B2","S2")
data4
##       P2   L2 B2          S2
## 1  42000 5850  3  secondfoor
## 2  38500 4000  2  firstfloor
## 3  49500 3060  3  firstfloor
## 4  60500 6650  3  secondfoor
## 5  61000 6360  2  firstfloor
## 6  66000 4160  3  firstfloor
## 7  66000 3880  3  secondfoor
## 8  69000 4160  3  thirdfloor
## 9  83800 4800  3  firstfloor
## 10 88500 5500  3 fourthfloor
## 11 90000 7200  3  firstfloor
## 12 30500 3000  2  firstfloor
## 13 27000 1700  3  secondfoor
## 14 36000 2880  3  firstfloor
## 15 37000 3600  2  firstfloor
## 16 37900 3185  2  firstfloor
## 17 40500 3300  3  secondfoor
## 18 40750 5200  4  thirdfloor
## 19 45000 3450  1  firstfloor
## 20 45000 3986  2  firstfloor
###7 bonus question 
data5=read.csv("https://raw.githubusercontent.com/Cnadour/HW2/main/HousePrices.csv")
head(data5)
##   X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000    5850        3         1       2      yes         no      yes
## 2 2 38500    4000        2         1       1      yes         no       no
## 3 3 49500    3060        3         1       1      yes         no       no
## 4 4 60500    6650        3         1       2      yes        yes       no
## 5 5 61000    6360        2         1       1      yes         no       no
## 6 6 66000    4160        3         1       1      yes        yes      yes
##   gasheat aircon garage prefer
## 1      no     no      1     no
## 2      no     no      0     no
## 3      no     no      0     no
## 4      no     no      0     no
## 5      no     no      0     no
## 6      no    yes      0     no