###1.

HP= read.csv("HousePrices.csv", header = TRUE, sep = ",")
data1=data.frame(HP)
head(data1)
##   X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000    5850        3         1       2      yes         no      yes
## 2 2 38500    4000        2         1       1      yes         no       no
## 3 3 49500    3060        3         1       1      yes         no       no
## 4 4 60500    6650        3         1       2      yes        yes       no
## 5 5 61000    6360        2         1       1      yes         no       no
## 6 6 66000    4160        3         1       1      yes        yes      yes
##   gasheat aircon garage prefer
## 1      no     no      1     no
## 2      no     no      0     no
## 3      no     no      0     no
## 4      no     no      0     no
## 5      no     no      0     no
## 6      no    yes      0     no
##1.a

summary(data1)
##        X             price           lotsize         bedrooms    
##  Min.   :  1.0   Min.   : 25000   Min.   : 1650   Min.   :1.000  
##  1st Qu.:137.2   1st Qu.: 49125   1st Qu.: 3600   1st Qu.:2.000  
##  Median :273.5   Median : 62000   Median : 4600   Median :3.000  
##  Mean   :273.5   Mean   : 68122   Mean   : 5150   Mean   :2.965  
##  3rd Qu.:409.8   3rd Qu.: 82000   3rd Qu.: 6360   3rd Qu.:3.000  
##  Max.   :546.0   Max.   :190000   Max.   :16200   Max.   :6.000  
##    bathrooms        stories        driveway          recreation       
##  Min.   :1.000   Min.   :1.000   Length:546         Length:546        
##  1st Qu.:1.000   1st Qu.:1.000   Class :character   Class :character  
##  Median :1.000   Median :2.000   Mode  :character   Mode  :character  
##  Mean   :1.286   Mean   :1.808                                        
##  3rd Qu.:2.000   3rd Qu.:2.000                                        
##  Max.   :4.000   Max.   :4.000                                        
##    fullbase           gasheat             aircon              garage      
##  Length:546         Length:546         Length:546         Min.   :0.0000  
##  Class :character   Class :character   Class :character   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.0000  
##                                                           Mean   :0.6923  
##                                                           3rd Qu.:1.0000  
##                                                           Max.   :3.0000  
##     prefer         
##  Length:546        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
##1.b
mean(data1$price)
## [1] 68121.6
median(data1$price)
## [1] 62000
mean(data1$lotsize)
## [1] 5150.266
median(data1$lotsize)
## [1] 4600
###2
data2=data1[1:20,]
data3=data.frame(data2$price, data2$lotsize, data2$bedrooms, data2$stories)
head(data3)
##   data2.price data2.lotsize data2.bedrooms data2.stories
## 1       42000          5850              3             2
## 2       38500          4000              2             1
## 3       49500          3060              3             1
## 4       60500          6650              3             2
## 5       61000          6360              2             1
## 6       66000          4160              3             1
###3

names(data3)=c("P","L","B","S")
head(data3)
##       P    L B S
## 1 42000 5850 3 2
## 2 38500 4000 2 1
## 3 49500 3060 3 1
## 4 60500 6650 3 2
## 5 61000 6360 2 1
## 6 66000 4160 3 1
###4

summary(data3)
##        P               L              B              S      
##  Min.   :27000   Min.   :1700   Min.   :1.00   Min.   :1.0  
##  1st Qu.:38350   1st Qu.:3271   1st Qu.:2.00   1st Qu.:1.0  
##  Median :45000   Median :3993   Median :3.00   Median :1.0  
##  Mean   :52723   Mean   :4296   Mean   :2.65   Mean   :1.6  
##  3rd Qu.:66000   3rd Qu.:5275   3rd Qu.:3.00   3rd Qu.:2.0  
##  Max.   :90000   Max.   :7200   Max.   :4.00   Max.   :4.0
mean(data3$P)
## [1] 52722.5
median(data3$P)
## [1] 45000
mean(data3$L)
## [1] 4296.05
median(data3$L)
## [1] 3993
### the new data frame has median and mean  smaller then the first data frame mean median because we have smaller data set, the mean and median is effected by the length of my data 

###5

S1= cut(data3$S,breaks=c(0,1,2,3,4),labels = c("firstfloor","secondfoor", "thirdfloor", "fourthfloor"))
S1
##  [1] secondfoor  firstfloor  firstfloor  secondfoor  firstfloor  firstfloor 
##  [7] secondfoor  thirdfloor  firstfloor  fourthfloor firstfloor  firstfloor 
## [13] secondfoor  firstfloor  firstfloor  firstfloor  secondfoor  thirdfloor 
## [19] firstfloor  firstfloor 
## Levels: firstfloor secondfoor thirdfloor fourthfloor
new=S1

data3$S1 <- new

head(data3)
##       P    L B S         S1
## 1 42000 5850 3 2 secondfoor
## 2 38500 4000 2 1 firstfloor
## 3 49500 3060 3 1 firstfloor
## 4 60500 6650 3 2 secondfoor
## 5 61000 6360 2 1 firstfloor
## 6 66000 4160 3 1 firstfloor
###6
data4=data.frame(data3$P, data3$L, data3$B, data3$S1)
names(data4)=c("P2","L2","B2","S2")
head(data4)
##      P2   L2 B2         S2
## 1 42000 5850  3 secondfoor
## 2 38500 4000  2 firstfloor
## 3 49500 3060  3 firstfloor
## 4 60500 6650  3 secondfoor
## 5 61000 6360  2 firstfloor
## 6 66000 4160  3 firstfloor
###7 bonus question 
data5=read.csv("https://raw.githubusercontent.com/Cnadour/HW2/main/HousePrices.csv")
head(data5)
##   X price lotsize bedrooms bathrooms stories driveway recreation fullbase
## 1 1 42000    5850        3         1       2      yes         no      yes
## 2 2 38500    4000        2         1       1      yes         no       no
## 3 3 49500    3060        3         1       1      yes         no       no
## 4 4 60500    6650        3         1       2      yes        yes       no
## 5 5 61000    6360        2         1       1      yes         no       no
## 6 6 66000    4160        3         1       1      yes        yes      yes
##   gasheat aircon garage prefer
## 1      no     no      1     no
## 2      no     no      0     no
## 3      no     no      0     no
## 4      no     no      0     no
## 5      no     no      0     no
## 6      no    yes      0     no