This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
summary(titanic)
PassengerId Survived Pclass
Min. : 1.0 Min. :0.0000 Min. :1.000
1st Qu.:223.5 1st Qu.:0.0000 1st Qu.:2.000
Median :446.0 Median :0.0000 Median :3.000
Mean :446.0 Mean :0.3838 Mean :2.309
3rd Qu.:668.5 3rd Qu.:1.0000 3rd Qu.:3.000
Max. :891.0 Max. :1.0000 Max. :3.000
Name Sex Age
Length:891 Length:891 Min. : 0.42
Class :character Class :character 1st Qu.:20.12
Mode :character Mode :character Median :28.00
Mean :29.70
3rd Qu.:38.00
Max. :80.00
NA's :177
SibSp Parch Ticket
Min. :0.000 Min. :0.0000 Length:891
1st Qu.:0.000 1st Qu.:0.0000 Class :character
Median :0.000 Median :0.0000 Mode :character
Mean :0.523 Mean :0.3816
3rd Qu.:1.000 3rd Qu.:0.0000
Max. :8.000 Max. :6.0000
Fare Cabin Embarked
Min. : 0.00 Length:891 Length:891
1st Qu.: 7.91 Class :character Class :character
Median : 14.45 Mode :character Mode :character
Mean : 32.20
3rd Qu.: 31.00
Max. :512.33
str(titanic)
tibble [891 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ PassengerId: num [1:891] 1 2 3 4 5 6 7 8 9 10 ...
$ Survived : num [1:891] 0 1 1 1 0 0 0 0 1 1 ...
$ Pclass : num [1:891] 3 1 3 1 3 3 1 3 3 2 ...
$ Name : chr [1:891] "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
$ Sex : chr [1:891] "male" "female" "female" "female" ...
$ Age : num [1:891] 22 38 26 35 35 NA 54 2 27 14 ...
$ SibSp : num [1:891] 1 1 0 1 0 0 0 3 0 1 ...
$ Parch : num [1:891] 0 0 0 0 0 0 0 1 2 0 ...
$ Ticket : chr [1:891] "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
$ Fare : num [1:891] 7.25 71.28 7.92 53.1 8.05 ...
$ Cabin : chr [1:891] NA "C85" NA "C123" ...
$ Embarked : chr [1:891] "S" "C" "S" "S" ...
- attr(*, "spec")=
.. cols(
.. PassengerId = [32mcol_double()[39m,
.. Survived = [32mcol_double()[39m,
.. Pclass = [32mcol_double()[39m,
.. Name = [31mcol_character()[39m,
.. Sex = [31mcol_character()[39m,
.. Age = [32mcol_double()[39m,
.. SibSp = [32mcol_double()[39m,
.. Parch = [32mcol_double()[39m,
.. Ticket = [31mcol_character()[39m,
.. Fare = [32mcol_double()[39m,
.. Cabin = [31mcol_character()[39m,
.. Embarked = [31mcol_character()[39m
.. )
Select a column
head(titanic)
head(titanic$PassengerId,5)
[1] 1 2 3 4 5
tail(titanic)
select a single row in titanic
titanic[1,]
table command - tabulates one variable vs other variable get no of people who survived and no of people who did not survive
table(titanic$Survived)
0 1
549 342
t <- table(titanic$Survived)
prop.table(t)
0 1
0.6161616 0.3838384
t<-table(titanic$Sex,titanic$Survived)
prop.table(t,margin = 1)
0 1
female 0.2579618 0.7420382
male 0.8110919 0.1889081
prop.table(t,margin = 2)
0 1
female 0.1475410 0.6812865
male 0.8524590 0.3187135
Data manipulation using dplyr library(dplyr) require(dplyr) %>% - this is a piping operator -
titanic %>% group_by(Pclass) %>%
summarise(avg_price = mean(Fare))
titanic %>% select(Name,Age) %>% arrange(desc(Age)) %>%
head(10)
mutate - this function operates on an already created column
titanic %>% group_by(Sex,Pclass) %>%
summarise(count = n_distinct(Name)) %>%
mutate(count_2 = count/2)
Spread function to spread the Sex variable
titanic %>% group_by(Sex,Pclass) %>%
summarise(count = n_distinct(Name)) %>%
spread(Sex,count)