# exam2) 캘리포니아 주택데이터에서 결측값을 제거한후에 처음부터 순서대로
# 70% 추출하여 'housing_median_age' 변수에 1사분위 값을 구하시오 
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.1.3에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
housing<-read.csv("housing1.csv")
summary(housing)
##    longitude         latitude     housing_median_age  total_rooms   
##  Min.   :-124.3   Min.   :32.54   Min.   : 1.00      Min.   :    2  
##  1st Qu.:-121.8   1st Qu.:33.93   1st Qu.:18.00      1st Qu.: 1448  
##  Median :-118.5   Median :34.26   Median :29.00      Median : 2127  
##  Mean   :-119.6   Mean   :35.63   Mean   :28.64      Mean   : 2636  
##  3rd Qu.:-118.0   3rd Qu.:37.71   3rd Qu.:37.00      3rd Qu.: 3148  
##  Max.   :-114.3   Max.   :41.95   Max.   :52.00      Max.   :39320  
##                                                                     
##  total_bedrooms     population      households     median_income    
##  Min.   :   1.0   Min.   :    3   Min.   :   1.0   Min.   : 0.4999  
##  1st Qu.: 296.0   1st Qu.:  787   1st Qu.: 280.0   1st Qu.: 2.5634  
##  Median : 435.0   Median : 1166   Median : 409.0   Median : 3.5348  
##  Mean   : 537.9   Mean   : 1425   Mean   : 499.5   Mean   : 3.8707  
##  3rd Qu.: 647.0   3rd Qu.: 1725   3rd Qu.: 605.0   3rd Qu.: 4.7432  
##  Max.   :6445.0   Max.   :35682   Max.   :6082.0   Max.   :15.0001  
##  NA's   :207                                                        
##  median_house_value ocean_proximity   
##  Min.   : 14999     Length:20640      
##  1st Qu.:119600     Class :character  
##  Median :179700     Mode  :character  
##  Mean   :206856                       
##  3rd Qu.:264725                       
##  Max.   :500001                       
## 
colSums(is.na(housing))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                207                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
# ① 결측값 제거
house<-housing %>% filter(!is.na(total_bedrooms))
colSums(is.na(house))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                  0                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
# ② 순서대로 70% 추출
rownum<-nrow(housing)*0.7
rownum
## [1] 14448
df<-16777.5
df1<-3.3333
round(df)
## [1] 16778
round(df1,2)
## [1] 3.33
house1<-house[1:rownum,]
# ③ housing_median_age 1사분위수 값 구하기
quantile(house1$housing_median_age)
##   0%  25%  50%  75% 100% 
##    1   19   30   37   52
df<-quantile(house1$housing_median_age)[[2]]
print(df)
## [1] 19