Titanic
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
titanic <- data.frame(Titanic)
databaru <- read.csv("house_price.csv")
head(databaru)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00 313000 3 1.50 1340 7912 1.5
## 2 2014-05-02 00:00:00 2384000 5 2.50 3650 9050 2.0
## 3 2014-05-02 00:00:00 342000 3 2.00 1930 11947 1.0
## 4 2014-05-02 00:00:00 420000 3 2.25 2000 8030 1.0
## 5 2014-05-02 00:00:00 550000 4 2.50 1940 10500 1.0
## 6 2014-05-02 00:00:00 490000 2 1.00 880 6380 1.0
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 3 1340 0 1955 2005
## 2 0 4 5 3370 280 1921 0
## 3 0 0 4 1930 0 1966 0
## 4 0 0 4 1000 1000 1963 0
## 5 0 0 4 1140 800 1976 1992
## 6 0 0 3 880 0 1938 1994
## street city statezip country
## 1 18810 Densmore Ave N Shoreline WA 98133 USA
## 2 709 W Blaine St Seattle WA 98119 USA
## 3 26206-26214 143rd Ave SE Kent WA 98042 USA
## 4 857 170th Pl NE Bellevue WA 98008 USA
## 5 9105 170th Ave NE Redmond WA 98052 USA
## 6 522 NE 88th St Seattle WA 98115 USA
str(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
summary(Titanic)
## Number of cases in table: 2201
## Number of factors: 4
## Test for independence of all factors:
## Chisq = 1637.4, df = 25, p-value = 0
## Chi-squared approximation may be incorrect
colSums(is.na(Titanic))
## , , Survived = No
##
## Age
## Sex Child Adult
## Male 0 0
## Female 0 0
##
## , , Survived = Yes
##
## Age
## Sex Child Adult
## Male 0 0
## Female 0 0
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)
airquality$Solar.R[is.na(airquality$Solar.R)] <- median(airquality$Solar.R, na.rm = TRUE)
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 0 0 0 0 0 0
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
titanic_selected <- select(titanic, Class, Sex, Survived)
head(titanic_selected)
## Class Sex Survived
## 1 1st Male No
## 2 2nd Male No
## 3 3rd Male No
## 4 Crew Male No
## 5 1st Female No
## 6 2nd Female No
library(dplyr)
titanic_child <- filter(titanic, Age == "Child")
titanic_sorted_asc <- arrange(titanic, Freq)
head(titanic_sorted_asc)
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 Crew Male Child No 0
## 4 1st Female Child No 0
## 5 2nd Female Child No 0
## 6 Crew Female Child No 0
titanic_sorted_desc <- arrange(titanic, desc(Freq))
head(titanic_sorted_desc)
## Class Sex Age Survived Freq
## 1 Crew Male Adult No 670
## 2 3rd Male Adult No 387
## 3 Crew Male Adult Yes 192
## 4 2nd Male Adult No 154
## 5 1st Female Adult Yes 140
## 6 1st Male Adult No 118
titanic_rename <- rename(titanic, Umur = Age)
titanic_rename
## Class Sex Umur Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
## 29 1st Female Adult Yes 140
## 30 2nd Female Adult Yes 80
## 31 3rd Female Adult Yes 76
## 32 Crew Female Adult Yes 20
titanic_mutate <- mutate(titanic, Proporsi = Freq / sum(Freq))
head(titanic_mutate)
## Class Sex Age Survived Freq Proporsi
## 1 1st Male Child No 0 0.00000000
## 2 2nd Male Child No 0 0.00000000
## 3 3rd Male Child No 35 0.01590186
## 4 Crew Male Child No 0 0.00000000
## 5 1st Female Child No 0 0.00000000
## 6 2nd Female Child No 0 0.00000000
extra <- data.frame(
Class = c("1st","2nd","3rd","Crew"),
Kapal = c("Titanic","Titanic","Titanic","Titanic")
)
titanic_joined <- left_join(titanic, extra, by = "Class")
head(titanic_joined)
## Class Sex Age Survived Freq Kapal
## 1 1st Male Child No 0 Titanic
## 2 2nd Male Child No 0 Titanic
## 3 3rd Male Child No 35 Titanic
## 4 Crew Male Child No 0 Titanic
## 5 1st Female Child No 0 Titanic
## 6 2nd Female Child No 0 Titanic
titanic_summary <- titanic %>%
group_by(Class, Survived) %>%
summarise(total = sum(Freq))
## `summarise()` has grouped output by 'Class'. You can override using the
## `.groups` argument.
titanic_summary
## # A tibble: 8 × 3
## # Groups: Class [4]
## Class Survived total
## <fct> <fct> <dbl>
## 1 1st No 122
## 2 1st Yes 203
## 3 2nd No 167
## 4 2nd Yes 118
## 5 3rd No 528
## 6 3rd Yes 178
## 7 Crew No 673
## 8 Crew Yes 212
set.seed(123)
index <- sample(1:nrow(titanic), 0.7*nrow(titanic))
train_data <- titanic[index, ]
test_data <- titanic[-index, ]
nrow(train_data); nrow(test_data)
## [1] 22
## [1] 10