R Markdown

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

##A. Data #A.1. Data Titanic

a1 <-as.data.frame(Titanic)
a1
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20

#A.2. Data House Price

a2 <- read.csv("house_price.csv")
head(a2)
##            date   price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2/5/2014 0:00  313000        3      1.50        1340     7912    1.5
## 2 2/5/2014 0:00 2384000        5      2.50        3650     9050    2.0
## 3 2/5/2014 0:00  342000        3      2.00        1930    11947    1.0
## 4 2/5/2014 0:00  420000        3      2.25        2000     8030    1.0
## 5 2/5/2014 0:00  550000        4      2.50        1940    10500    1.0
## 6 2/5/2014 0:00  490000        2      1.00         880     6380    1.0
##   waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1          0    0         3       1340             0     1955         2005
## 2          0    4         5       3370           280     1921            0
## 3          0    0         4       1930             0     1966            0
## 4          0    0         4       1000          1000     1963            0
## 5          0    0         4       1140           800     1976         1992
## 6          0    0         3        880             0     1938         1994
##                     street      city statezip country
## 1     18810 Densmore Ave N Shoreline WA 98133     USA
## 2          709 W Blaine St   Seattle WA 98119     USA
## 3 26206-26214 143rd Ave SE      Kent WA 98042     USA
## 4          857 170th Pl NE  Bellevue WA 98008     USA
## 5        9105 170th Ave NE   Redmond WA 98052     USA
## 6           522 NE 88th St   Seattle WA 98115     USA

##B. Struktur Data #B.1. Struktur Data Titanic

str(a1)
## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...

#B.2. Struktur Data House Price

str(a2)
## 'data.frame':    4600 obs. of  18 variables:
##  $ date         : chr  "2/5/2014 0:00" "2/5/2014 0:00" "2/5/2014 0:00" "2/5/2014 0:00" ...
##  $ price        : num  313000 2384000 342000 420000 550000 ...
##  $ bedrooms     : int  3 5 3 3 4 2 2 4 3 4 ...
##  $ bathrooms    : num  1.5 2.5 2 2.25 2.5 1 2 2.5 2.5 2 ...
##  $ sqft_living  : int  1340 3650 1930 2000 1940 880 1350 2710 2430 1520 ...
##  $ sqft_lot     : int  7912 9050 11947 8030 10500 6380 2560 35868 88426 6200 ...
##  $ floors       : num  1.5 2 1 1 1 1 1 2 1 1.5 ...
##  $ waterfront   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ view         : int  0 4 0 0 0 0 0 0 0 0 ...
##  $ condition    : int  3 5 4 4 4 3 3 3 4 3 ...
##  $ sqft_above   : int  1340 3370 1930 1000 1140 880 1350 2710 1570 1520 ...
##  $ sqft_basement: int  0 280 0 1000 800 0 0 0 860 0 ...
##  $ yr_built     : int  1955 1921 1966 1963 1976 1938 1976 1989 1985 1945 ...
##  $ yr_renovated : int  2005 0 0 0 1992 1994 0 0 0 2010 ...
##  $ street       : chr  "18810 Densmore Ave N" "709 W Blaine St" "26206-26214 143rd Ave SE" "857 170th Pl NE" ...
##  $ city         : chr  "Shoreline" "Seattle" "Kent" "Bellevue" ...
##  $ statezip     : chr  "WA 98133" "WA 98119" "WA 98042" "WA 98008" ...
##  $ country      : chr  "USA" "USA" "USA" "USA" ...

##C. Ringkasan Statistik #C.1. Ringkasan Statistik Titanic

summary(a1)
##   Class       Sex        Age     Survived      Freq       
##  1st :8   Male  :16   Child:16   No :16   Min.   :  0.00  
##  2nd :8   Female:16   Adult:16   Yes:16   1st Qu.:  0.75  
##  3rd :8                                   Median : 13.50  
##  Crew:8                                   Mean   : 68.78  
##                                           3rd Qu.: 77.00  
##                                           Max.   :670.00

#C.1. Ringkasan Statistik House Price

summary(a2)
##      date               price             bedrooms       bathrooms    
##  Length:4600        Min.   :       0   Min.   :0.000   Min.   :0.000  
##  Class :character   1st Qu.:  322875   1st Qu.:3.000   1st Qu.:1.750  
##  Mode  :character   Median :  460944   Median :3.000   Median :2.250  
##                     Mean   :  551963   Mean   :3.401   Mean   :2.161  
##                     3rd Qu.:  654963   3rd Qu.:4.000   3rd Qu.:2.500  
##                     Max.   :26590000   Max.   :9.000   Max.   :8.000  
##   sqft_living       sqft_lot           floors        waterfront      
##  Min.   :  370   Min.   :    638   Min.   :1.000   Min.   :0.000000  
##  1st Qu.: 1460   1st Qu.:   5001   1st Qu.:1.000   1st Qu.:0.000000  
##  Median : 1980   Median :   7683   Median :1.500   Median :0.000000  
##  Mean   : 2139   Mean   :  14853   Mean   :1.512   Mean   :0.007174  
##  3rd Qu.: 2620   3rd Qu.:  11001   3rd Qu.:2.000   3rd Qu.:0.000000  
##  Max.   :13540   Max.   :1074218   Max.   :3.500   Max.   :1.000000  
##       view          condition       sqft_above   sqft_basement   
##  Min.   :0.0000   Min.   :1.000   Min.   : 370   Min.   :   0.0  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:1190   1st Qu.:   0.0  
##  Median :0.0000   Median :3.000   Median :1590   Median :   0.0  
##  Mean   :0.2407   Mean   :3.452   Mean   :1827   Mean   : 312.1  
##  3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.:2300   3rd Qu.: 610.0  
##  Max.   :4.0000   Max.   :5.000   Max.   :9410   Max.   :4820.0  
##     yr_built     yr_renovated       street              city          
##  Min.   :1900   Min.   :   0.0   Length:4600        Length:4600       
##  1st Qu.:1951   1st Qu.:   0.0   Class :character   Class :character  
##  Median :1976   Median :   0.0   Mode  :character   Mode  :character  
##  Mean   :1971   Mean   : 808.6                                        
##  3rd Qu.:1997   3rd Qu.:1999.0                                        
##  Max.   :2014   Max.   :2014.0                                        
##    statezip           country         
##  Length:4600        Length:4600       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

##D. Missing Value #D.1. Missing Value Titanic

colSums(is.na(a1))
##    Class      Sex      Age Survived     Freq 
##        0        0        0        0        0

#D.2. Missing Value House Price

colSums(is.na(a2))
##          date         price      bedrooms     bathrooms   sqft_living 
##             0             0             0             0             0 
##      sqft_lot        floors    waterfront          view     condition 
##             0             0             0             0             0 
##    sqft_above sqft_basement      yr_built  yr_renovated        street 
##             0             0             0             0             0 
##          city      statezip       country 
##             0             0             0

##E. Memilih Kolom Tertentu #E.1. Memilih Kolom Tertentu di Titanic

pilihdata1 <- select(a1, Class, Age)
head(pilihdata1)
##   Class   Age
## 1   1st Child
## 2   2nd Child
## 3   3rd Child
## 4  Crew Child
## 5   1st Child
## 6   2nd Child

#E.2. Memilih Kolom Tertentu di House Price

pilihdata2 <- select(a2, date, floors, country)
head(pilihdata2)
##            date floors country
## 1 2/5/2014 0:00    1.5     USA
## 2 2/5/2014 0:00    2.0     USA
## 3 2/5/2014 0:00    1.0     USA
## 4 2/5/2014 0:00    1.0     USA
## 5 2/5/2014 0:00    1.0     USA
## 6 2/5/2014 0:00    1.0     USA

##F. Filter dan Sortir Data #F.1. Filter Titanic

filter1 <- filter(a1, Freq >50)
filter1[order(-filter1$Freq), c("Class", "Sex", "Age", "Survived", "Freq")]
##    Class    Sex   Age Survived Freq
## 4   Crew   Male Adult       No  670
## 3    3rd   Male Adult       No  387
## 8   Crew   Male Adult      Yes  192
## 2    2nd   Male Adult       No  154
## 9    1st Female Adult      Yes  140
## 1    1st   Male Adult       No  118
## 5    3rd Female Adult       No   89
## 10   2nd Female Adult      Yes   80
## 11   3rd Female Adult      Yes   76
## 7    3rd   Male Adult      Yes   75
## 6    1st   Male Adult      Yes   57
head(filter1)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Adult       No  118
## 2   2nd   Male Adult       No  154
## 3   3rd   Male Adult       No  387
## 4  Crew   Male Adult       No  670
## 5   3rd Female Adult       No   89
## 6   1st   Male Adult      Yes   57

#F.2. Filter House Price

filter2<-filter(a2, bedrooms>=3)
filter21<-filter(filter2, sqft_living>=2000)
filter212<-filter21[order(-filter21$sqft_living), c("statezip", "bedrooms", "sqft_living")]
head(filter212)
##      statezip bedrooms sqft_living
## 61   WA 98053        7       13540
## 1068 WA 98004        5       10040
## 1243 WA 98040        5        9640
## 1088 WA 98177        5        8670
## 2198 WA 98006        5        8020
## 1979 WA 98058        5        7320
filter2<-filter(a2, bedrooms>=3)
filter21<-filter(filter2, sqft_living>=2000)
filter22<-arrange(filter21, sqft_living)
head(filter22)
##            date  price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2/5/2014 0:00 420000        3      2.25        2000     8030    1.0
## 2 6/5/2014 0:00 561000        3      2.00        2000     7000    2.0
## 3 6/5/2014 0:00 284000        4      2.50        2000     5390    2.0
## 4 6/5/2014 0:00 513000        4      2.50        2000     5684    2.0
## 5 8/5/2014 0:00 228000        4      1.75        2000     6120    1.0
## 6 9/5/2014 0:00 536500        4      1.75        2000     4000    1.5
##   waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1          0    0         4       1000          1000     1963            0
## 2          0    0         3       2000             0     1916         1986
## 3          0    0         3       2000             0     2003            0
## 4          0    0         3       2000             0     1996            0
## 5          0    0         3       1100           900     1965         1993
## 6          0    0         5       1450           550     1926            0
##                   street     city statezip country
## 1        857 170th Pl NE Bellevue WA 98008     USA
## 2   6422 Marshall Ave SW  Seattle WA 98136     USA
## 3     25434 160th Ave SE     Kent WA 98042     USA
## 4       9041 NE 160th Pl  Kenmore WA 98028     USA
## 5     5933 S Eastwood Dr  Seattle WA 98178     USA
## 6 4127 Fauntleroy Way SW  Seattle WA 98126     USA

##G. Rename

rename1 <- rename(a1, Usia = Age)
head(rename1)
##   Class    Sex  Usia Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0

##H. Mutate

mutate2 <- mutate(a2, kondisi= ifelse(condition>3, "Bagus", "Kurang"))
head(mutate2)
##            date   price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2/5/2014 0:00  313000        3      1.50        1340     7912    1.5
## 2 2/5/2014 0:00 2384000        5      2.50        3650     9050    2.0
## 3 2/5/2014 0:00  342000        3      2.00        1930    11947    1.0
## 4 2/5/2014 0:00  420000        3      2.25        2000     8030    1.0
## 5 2/5/2014 0:00  550000        4      2.50        1940    10500    1.0
## 6 2/5/2014 0:00  490000        2      1.00         880     6380    1.0
##   waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1          0    0         3       1340             0     1955         2005
## 2          0    4         5       3370           280     1921            0
## 3          0    0         4       1930             0     1966            0
## 4          0    0         4       1000          1000     1963            0
## 5          0    0         4       1140           800     1976         1992
## 6          0    0         3        880             0     1938         1994
##                     street      city statezip country kondisi
## 1     18810 Densmore Ave N Shoreline WA 98133     USA  Kurang
## 2          709 W Blaine St   Seattle WA 98119     USA   Bagus
## 3 26206-26214 143rd Ave SE      Kent WA 98042     USA   Bagus
## 4          857 170th Pl NE  Bellevue WA 98008     USA   Bagus
## 5        9105 170th Ave NE   Redmond WA 98052     USA   Bagus
## 6           522 NE 88th St   Seattle WA 98115     USA  Kurang