library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##A. Data #A.1. Data Titanic
a1 <-as.data.frame(Titanic)
a1
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
## 29 1st Female Adult Yes 140
## 30 2nd Female Adult Yes 80
## 31 3rd Female Adult Yes 76
## 32 Crew Female Adult Yes 20
#A.2. Data House Price
a2 <- read.csv("house_price.csv")
head(a2)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2/5/2014 0:00 313000 3 1.50 1340 7912 1.5
## 2 2/5/2014 0:00 2384000 5 2.50 3650 9050 2.0
## 3 2/5/2014 0:00 342000 3 2.00 1930 11947 1.0
## 4 2/5/2014 0:00 420000 3 2.25 2000 8030 1.0
## 5 2/5/2014 0:00 550000 4 2.50 1940 10500 1.0
## 6 2/5/2014 0:00 490000 2 1.00 880 6380 1.0
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 3 1340 0 1955 2005
## 2 0 4 5 3370 280 1921 0
## 3 0 0 4 1930 0 1966 0
## 4 0 0 4 1000 1000 1963 0
## 5 0 0 4 1140 800 1976 1992
## 6 0 0 3 880 0 1938 1994
## street city statezip country
## 1 18810 Densmore Ave N Shoreline WA 98133 USA
## 2 709 W Blaine St Seattle WA 98119 USA
## 3 26206-26214 143rd Ave SE Kent WA 98042 USA
## 4 857 170th Pl NE Bellevue WA 98008 USA
## 5 9105 170th Ave NE Redmond WA 98052 USA
## 6 522 NE 88th St Seattle WA 98115 USA
##B. Struktur Data #B.1. Struktur Data Titanic
str(a1)
## 'data.frame': 32 obs. of 5 variables:
## $ Class : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
## $ Sex : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
## $ Age : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
## $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Freq : num 0 0 35 0 0 0 17 0 118 154 ...
#B.2. Struktur Data House Price
str(a2)
## 'data.frame': 4600 obs. of 18 variables:
## $ date : chr "2/5/2014 0:00" "2/5/2014 0:00" "2/5/2014 0:00" "2/5/2014 0:00" ...
## $ price : num 313000 2384000 342000 420000 550000 ...
## $ bedrooms : int 3 5 3 3 4 2 2 4 3 4 ...
## $ bathrooms : num 1.5 2.5 2 2.25 2.5 1 2 2.5 2.5 2 ...
## $ sqft_living : int 1340 3650 1930 2000 1940 880 1350 2710 2430 1520 ...
## $ sqft_lot : int 7912 9050 11947 8030 10500 6380 2560 35868 88426 6200 ...
## $ floors : num 1.5 2 1 1 1 1 1 2 1 1.5 ...
## $ waterfront : int 0 0 0 0 0 0 0 0 0 0 ...
## $ view : int 0 4 0 0 0 0 0 0 0 0 ...
## $ condition : int 3 5 4 4 4 3 3 3 4 3 ...
## $ sqft_above : int 1340 3370 1930 1000 1140 880 1350 2710 1570 1520 ...
## $ sqft_basement: int 0 280 0 1000 800 0 0 0 860 0 ...
## $ yr_built : int 1955 1921 1966 1963 1976 1938 1976 1989 1985 1945 ...
## $ yr_renovated : int 2005 0 0 0 1992 1994 0 0 0 2010 ...
## $ street : chr "18810 Densmore Ave N" "709 W Blaine St" "26206-26214 143rd Ave SE" "857 170th Pl NE" ...
## $ city : chr "Shoreline" "Seattle" "Kent" "Bellevue" ...
## $ statezip : chr "WA 98133" "WA 98119" "WA 98042" "WA 98008" ...
## $ country : chr "USA" "USA" "USA" "USA" ...
##C. Ringkasan Statistik #C.1. Ringkasan Statistik Titanic
summary(a1)
## Class Sex Age Survived Freq
## 1st :8 Male :16 Child:16 No :16 Min. : 0.00
## 2nd :8 Female:16 Adult:16 Yes:16 1st Qu.: 0.75
## 3rd :8 Median : 13.50
## Crew:8 Mean : 68.78
## 3rd Qu.: 77.00
## Max. :670.00
#C.1. Ringkasan Statistik House Price
summary(a2)
## date price bedrooms bathrooms
## Length:4600 Min. : 0 Min. :0.000 Min. :0.000
## Class :character 1st Qu.: 322875 1st Qu.:3.000 1st Qu.:1.750
## Mode :character Median : 460944 Median :3.000 Median :2.250
## Mean : 551963 Mean :3.401 Mean :2.161
## 3rd Qu.: 654963 3rd Qu.:4.000 3rd Qu.:2.500
## Max. :26590000 Max. :9.000 Max. :8.000
## sqft_living sqft_lot floors waterfront
## Min. : 370 Min. : 638 Min. :1.000 Min. :0.000000
## 1st Qu.: 1460 1st Qu.: 5001 1st Qu.:1.000 1st Qu.:0.000000
## Median : 1980 Median : 7683 Median :1.500 Median :0.000000
## Mean : 2139 Mean : 14853 Mean :1.512 Mean :0.007174
## 3rd Qu.: 2620 3rd Qu.: 11001 3rd Qu.:2.000 3rd Qu.:0.000000
## Max. :13540 Max. :1074218 Max. :3.500 Max. :1.000000
## view condition sqft_above sqft_basement
## Min. :0.0000 Min. :1.000 Min. : 370 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:1190 1st Qu.: 0.0
## Median :0.0000 Median :3.000 Median :1590 Median : 0.0
## Mean :0.2407 Mean :3.452 Mean :1827 Mean : 312.1
## 3rd Qu.:0.0000 3rd Qu.:4.000 3rd Qu.:2300 3rd Qu.: 610.0
## Max. :4.0000 Max. :5.000 Max. :9410 Max. :4820.0
## yr_built yr_renovated street city
## Min. :1900 Min. : 0.0 Length:4600 Length:4600
## 1st Qu.:1951 1st Qu.: 0.0 Class :character Class :character
## Median :1976 Median : 0.0 Mode :character Mode :character
## Mean :1971 Mean : 808.6
## 3rd Qu.:1997 3rd Qu.:1999.0
## Max. :2014 Max. :2014.0
## statezip country
## Length:4600 Length:4600
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##D. Missing Value #D.1. Missing Value Titanic
colSums(is.na(a1))
## Class Sex Age Survived Freq
## 0 0 0 0 0
#D.2. Missing Value House Price
colSums(is.na(a2))
## date price bedrooms bathrooms sqft_living
## 0 0 0 0 0
## sqft_lot floors waterfront view condition
## 0 0 0 0 0
## sqft_above sqft_basement yr_built yr_renovated street
## 0 0 0 0 0
## city statezip country
## 0 0 0
##E. Memilih Kolom Tertentu #E.1. Memilih Kolom Tertentu di Titanic
pilihdata1 <- select(a1, Class, Age)
head(pilihdata1)
## Class Age
## 1 1st Child
## 2 2nd Child
## 3 3rd Child
## 4 Crew Child
## 5 1st Child
## 6 2nd Child
#E.2. Memilih Kolom Tertentu di House Price
pilihdata2 <- select(a2, date, floors, country)
head(pilihdata2)
## date floors country
## 1 2/5/2014 0:00 1.5 USA
## 2 2/5/2014 0:00 2.0 USA
## 3 2/5/2014 0:00 1.0 USA
## 4 2/5/2014 0:00 1.0 USA
## 5 2/5/2014 0:00 1.0 USA
## 6 2/5/2014 0:00 1.0 USA
##F. Filter dan Sortir Data #F.1. Filter Titanic
filter1 <- filter(a1, Freq >50)
filter1[order(-filter1$Freq), c("Class", "Sex", "Age", "Survived", "Freq")]
## Class Sex Age Survived Freq
## 4 Crew Male Adult No 670
## 3 3rd Male Adult No 387
## 8 Crew Male Adult Yes 192
## 2 2nd Male Adult No 154
## 9 1st Female Adult Yes 140
## 1 1st Male Adult No 118
## 5 3rd Female Adult No 89
## 10 2nd Female Adult Yes 80
## 11 3rd Female Adult Yes 76
## 7 3rd Male Adult Yes 75
## 6 1st Male Adult Yes 57
head(filter1)
## Class Sex Age Survived Freq
## 1 1st Male Adult No 118
## 2 2nd Male Adult No 154
## 3 3rd Male Adult No 387
## 4 Crew Male Adult No 670
## 5 3rd Female Adult No 89
## 6 1st Male Adult Yes 57
#F.2. Filter House Price
filter2<-filter(a2, bedrooms>=3)
filter21<-filter(filter2, sqft_living>=2000)
filter212<-filter21[order(-filter21$sqft_living), c("statezip", "bedrooms", "sqft_living")]
head(filter212)
## statezip bedrooms sqft_living
## 61 WA 98053 7 13540
## 1068 WA 98004 5 10040
## 1243 WA 98040 5 9640
## 1088 WA 98177 5 8670
## 2198 WA 98006 5 8020
## 1979 WA 98058 5 7320
filter2<-filter(a2, bedrooms>=3)
filter21<-filter(filter2, sqft_living>=2000)
filter22<-arrange(filter21, sqft_living)
head(filter22)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2/5/2014 0:00 420000 3 2.25 2000 8030 1.0
## 2 6/5/2014 0:00 561000 3 2.00 2000 7000 2.0
## 3 6/5/2014 0:00 284000 4 2.50 2000 5390 2.0
## 4 6/5/2014 0:00 513000 4 2.50 2000 5684 2.0
## 5 8/5/2014 0:00 228000 4 1.75 2000 6120 1.0
## 6 9/5/2014 0:00 536500 4 1.75 2000 4000 1.5
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 4 1000 1000 1963 0
## 2 0 0 3 2000 0 1916 1986
## 3 0 0 3 2000 0 2003 0
## 4 0 0 3 2000 0 1996 0
## 5 0 0 3 1100 900 1965 1993
## 6 0 0 5 1450 550 1926 0
## street city statezip country
## 1 857 170th Pl NE Bellevue WA 98008 USA
## 2 6422 Marshall Ave SW Seattle WA 98136 USA
## 3 25434 160th Ave SE Kent WA 98042 USA
## 4 9041 NE 160th Pl Kenmore WA 98028 USA
## 5 5933 S Eastwood Dr Seattle WA 98178 USA
## 6 4127 Fauntleroy Way SW Seattle WA 98126 USA
##G. Rename
rename1 <- rename(a1, Usia = Age)
head(rename1)
## Class Sex Usia Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
##H. Mutate
mutate2 <- mutate(a2, kondisi= ifelse(condition>3, "Bagus", "Kurang"))
head(mutate2)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2/5/2014 0:00 313000 3 1.50 1340 7912 1.5
## 2 2/5/2014 0:00 2384000 5 2.50 3650 9050 2.0
## 3 2/5/2014 0:00 342000 3 2.00 1930 11947 1.0
## 4 2/5/2014 0:00 420000 3 2.25 2000 8030 1.0
## 5 2/5/2014 0:00 550000 4 2.50 1940 10500 1.0
## 6 2/5/2014 0:00 490000 2 1.00 880 6380 1.0
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 3 1340 0 1955 2005
## 2 0 4 5 3370 280 1921 0
## 3 0 0 4 1930 0 1966 0
## 4 0 0 4 1000 1000 1963 0
## 5 0 0 4 1140 800 1976 1992
## 6 0 0 3 880 0 1938 1994
## street city statezip country kondisi
## 1 18810 Densmore Ave N Shoreline WA 98133 USA Kurang
## 2 709 W Blaine St Seattle WA 98119 USA Bagus
## 3 26206-26214 143rd Ave SE Kent WA 98042 USA Bagus
## 4 857 170th Pl NE Bellevue WA 98008 USA Bagus
## 5 9105 170th Ave NE Redmond WA 98052 USA Bagus
## 6 522 NE 88th St Seattle WA 98115 USA Kurang