Titanic
## , , Age = Child, Survived = No
## 
##       Sex
## Class  Male Female
##   1st     0      0
##   2nd     0      0
##   3rd    35     17
##   Crew    0      0
## 
## , , Age = Adult, Survived = No
## 
##       Sex
## Class  Male Female
##   1st   118      4
##   2nd   154     13
##   3rd   387     89
##   Crew  670      3
## 
## , , Age = Child, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st     5      1
##   2nd    11     13
##   3rd    13     14
##   Crew    0      0
## 
## , , Age = Adult, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st    57    140
##   2nd    14     80
##   3rd    75     76
##   Crew  192     20
titanic <- data.frame(Titanic)
databaru <- read.csv("house_price.csv")
head(databaru)
##                  date   price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00  313000        3      1.50        1340     7912    1.5
## 2 2014-05-02 00:00:00 2384000        5      2.50        3650     9050    2.0
## 3 2014-05-02 00:00:00  342000        3      2.00        1930    11947    1.0
## 4 2014-05-02 00:00:00  420000        3      2.25        2000     8030    1.0
## 5 2014-05-02 00:00:00  550000        4      2.50        1940    10500    1.0
## 6 2014-05-02 00:00:00  490000        2      1.00         880     6380    1.0
##   waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1          0    0         3       1340             0     1955         2005
## 2          0    4         5       3370           280     1921            0
## 3          0    0         4       1930             0     1966            0
## 4          0    0         4       1000          1000     1963            0
## 5          0    0         4       1140           800     1976         1992
## 6          0    0         3        880             0     1938         1994
##                     street      city statezip country
## 1     18810 Densmore Ave N Shoreline WA 98133     USA
## 2          709 W Blaine St   Seattle WA 98119     USA
## 3 26206-26214 143rd Ave SE      Kent WA 98042     USA
## 4          857 170th Pl NE  Bellevue WA 98008     USA
## 5        9105 170th Ave NE   Redmond WA 98052     USA
## 6           522 NE 88th St   Seattle WA 98115     USA
str(Titanic)
##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"
summary(Titanic)
## Number of cases in table: 2201 
## Number of factors: 4 
## Test for independence of all factors:
##  Chisq = 1637.4, df = 25, p-value = 0
##  Chi-squared approximation may be incorrect
colSums(is.na(Titanic))
## , , Survived = No
## 
##         Age
## Sex      Child Adult
##   Male       0     0
##   Female     0     0
## 
## , , Survived = Yes
## 
##         Age
## Sex      Child Adult
##   Male       0     0
##   Female     0     0
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)
airquality$Solar.R[is.na(airquality$Solar.R)] <- median(airquality$Solar.R, na.rm = TRUE)
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##       0       0       0       0       0       0
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
titanic_selected <- select(titanic, Class, Sex, Survived)
head(titanic_selected)
##   Class    Sex Survived
## 1   1st   Male       No
## 2   2nd   Male       No
## 3   3rd   Male       No
## 4  Crew   Male       No
## 5   1st Female       No
## 6   2nd Female       No
library(dplyr)
titanic_child <- filter(titanic, Age == "Child")
titanic_sorted_asc <- arrange(titanic, Freq)
head(titanic_sorted_asc)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3  Crew   Male Child       No    0
## 4   1st Female Child       No    0
## 5   2nd Female Child       No    0
## 6  Crew Female Child       No    0
titanic_sorted_desc <- arrange(titanic, desc(Freq))
head(titanic_sorted_desc)
##   Class    Sex   Age Survived Freq
## 1  Crew   Male Adult       No  670
## 2   3rd   Male Adult       No  387
## 3  Crew   Male Adult      Yes  192
## 4   2nd   Male Adult       No  154
## 5   1st Female Adult      Yes  140
## 6   1st   Male Adult       No  118
titanic_rename <- rename(titanic, Umur = Age)
titanic_rename
##    Class    Sex  Umur Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20
titanic_mutate <- mutate(titanic, Proporsi = Freq / sum(Freq))
head(titanic_mutate)
##   Class    Sex   Age Survived Freq   Proporsi
## 1   1st   Male Child       No    0 0.00000000
## 2   2nd   Male Child       No    0 0.00000000
## 3   3rd   Male Child       No   35 0.01590186
## 4  Crew   Male Child       No    0 0.00000000
## 5   1st Female Child       No    0 0.00000000
## 6   2nd Female Child       No    0 0.00000000
extra <- data.frame(
  Class = c("1st","2nd","3rd","Crew"),
  Kapal = c("Titanic","Titanic","Titanic","Titanic")
)
titanic_joined <- left_join(titanic, extra, by = "Class")

head(titanic_joined)
##   Class    Sex   Age Survived Freq   Kapal
## 1   1st   Male Child       No    0 Titanic
## 2   2nd   Male Child       No    0 Titanic
## 3   3rd   Male Child       No   35 Titanic
## 4  Crew   Male Child       No    0 Titanic
## 5   1st Female Child       No    0 Titanic
## 6   2nd Female Child       No    0 Titanic
titanic_summary <- titanic %>%
  group_by(Class, Survived) %>%
  summarise(total = sum(Freq))
## `summarise()` has grouped output by 'Class'. You can override using the
## `.groups` argument.
titanic_summary
## # A tibble: 8 × 3
## # Groups:   Class [4]
##   Class Survived total
##   <fct> <fct>    <dbl>
## 1 1st   No         122
## 2 1st   Yes        203
## 3 2nd   No         167
## 4 2nd   Yes        118
## 5 3rd   No         528
## 6 3rd   Yes        178
## 7 Crew  No         673
## 8 Crew  Yes        212
set.seed(123)
index <- sample(1:nrow(titanic), 0.7*nrow(titanic))

train_data <- titanic[index, ]
test_data  <- titanic[-index, ]

nrow(train_data); nrow(test_data)
## [1] 22
## [1] 10