# part3 예제 1번

rm(list=ls())
ls()
## character(0)
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.2에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
house<-read.csv("housing.csv")
View(house)
glimpse(house)
## Rows: 20,640
## Columns: 10
## $ longitude          <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude           <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms        <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms     <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population         <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households         <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income      <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity    <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
nrow(house)
## [1] 20640
rownum<-nrow(house)*0.8
house1<-house[1:rownum,]
glimpse(house1)
## Rows: 16,512
## Columns: 10
## $ longitude          <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude           <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms        <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms     <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population         <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households         <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income      <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity    <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
house1 %>% glimpse
## Rows: 16,512
## Columns: 10
## $ longitude          <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude           <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms        <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms     <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population         <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households         <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income      <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity    <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
table(is.na(house1))
## 
##  FALSE   TRUE 
## 164961    159
df1<-sd(house1$total_bedroom, na.rm=TRUE)
df1
## [1] 435.9006
df2<-median(house1$total_bedrooms,na.rm=TRUE)
df2
## [1] 436
house1$total_bedrooms<-ifelse(is.na(house1$total_bedrooms), df2, house1$total_bedrooms)
colSums(is.na(house1))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                  0                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
df3<-sd(house1$total_bedroom)
df3
## [1] 433.9254
df4<-df1-df3
df4
## [1] 1.975147
#예제 2번
rm(list=ls())
ls()
## character(0)
house<-read.csv("housing.csv")
nrow(house)
## [1] 20640
colSums(is.na(house))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                207                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
house<-house %>% filter(!is.na(total_bedrooms))
colSums(is.na(house))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                  0                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
rownum<-nrow(house)*0.7
rownum
## [1] 14303.1
house1<-house[1:rownum,]
quantile(house1$housing_median_age)
##   0%  25%  50%  75% 100% 
##    1   19   30   38   52
df<-quantile(house1$housing_median_age)[[2]]
print(df)
## [1] 19