This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
summary(titanic_data)
PassengerId Survived Pclass
Min. : 1.0 Min. :0.0000 Min. :1.000
1st Qu.:223.5 1st Qu.:0.0000 1st Qu.:2.000
Median :446.0 Median :0.0000 Median :3.000
Mean :446.0 Mean :0.3838 Mean :2.309
3rd Qu.:668.5 3rd Qu.:1.0000 3rd Qu.:3.000
Max. :891.0 Max. :1.0000 Max. :3.000
Name Sex
Length:891 Length:891
Class :character Class :character
Mode :character Mode :character
Age SibSp Parch
Min. : 0.42 Min. :0.000 Min. :0.0000
1st Qu.:20.12 1st Qu.:0.000 1st Qu.:0.0000
Median :28.00 Median :0.000 Median :0.0000
Mean :29.70 Mean :0.523 Mean :0.3816
3rd Qu.:38.00 3rd Qu.:1.000 3rd Qu.:0.0000
Max. :80.00 Max. :8.000 Max. :6.0000
NA's :177
Ticket Fare
Length:891 Min. : 0.00
Class :character 1st Qu.: 7.91
Mode :character Median : 14.45
Mean : 32.20
3rd Qu.: 31.00
Max. :512.33
Cabin Embarked
Length:891 Length:891
Class :character Class :character
Mode :character Mode :character
str(titanic_data)
tibble [891 x 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ PassengerId: num [1:891] 1 2 3 4 5 6 7 8 9 10 ...
$ Survived : num [1:891] 0 1 1 1 0 0 0 0 1 1 ...
$ Pclass : num [1:891] 3 1 3 1 3 3 1 3 3 2 ...
$ Name : chr [1:891] "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
$ Sex : chr [1:891] "male" "female" "female" "female" ...
$ Age : num [1:891] 22 38 26 35 35 NA 54 2 27 14 ...
$ SibSp : num [1:891] 1 1 0 1 0 0 0 3 0 1 ...
$ Parch : num [1:891] 0 0 0 0 0 0 0 1 2 0 ...
$ Ticket : chr [1:891] "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
$ Fare : num [1:891] 7.25 71.28 7.92 53.1 8.05 ...
$ Cabin : chr [1:891] NA "C85" NA "C123" ...
$ Embarked : chr [1:891] "S" "C" "S" "S" ...
- attr(*, "spec")=
.. cols(
.. PassengerId = col_double(),
.. Survived = col_double(),
.. Pclass = col_double(),
.. Name = col_character(),
.. Sex = col_character(),
.. Age = col_double(),
.. SibSp = col_double(),
.. Parch = col_double(),
.. Ticket = col_character(),
.. Fare = col_double(),
.. Cabin = col_character(),
.. Embarked = col_character()
.. )
select a column
head(titanic_data$PassengerId,5)
[1] 1 2 3 4 5
tail(titanic_data)
select a single row
titanic_data[1,]
titanic_data[1:10,]
titanic_data[3,4]
titanic_data$Name[1:5]
[1] "Braund, Mr. Owen Harris"
[2] "Cumings, Mrs. John Bradley (Florence Briggs Thayer)"
[3] "Heikkinen, Miss. Laina"
[4] "Futrelle, Mrs. Jacques Heath (Lily May Peel)"
[5] "Allen, Mr. William Henry"
table command tablulates one variable vs other variable
get the no. of people who survived and no of people who does not survived
table(titanic_data$Survived)
0 1
549 342
percentage of people servived and died
t<-table(titanic_data$Survived)
prop.table(t)
0 1
0.6161616 0.3838384
t<-table(titanic_data$Sex,titanic_data$Survived)
prop.table(t,margin=1)
0 1
female 0.2579618 0.7420382
male 0.8110919 0.1889081
prop.table(t,margin=2)
0 1
female 0.1475410 0.6812865
male 0.8524590 0.3187135
Data manipulation using dplyr
titanic_data %>% group_by(Pclass) %>% summarise(mean_Price = mean(Fare))
`summarise()` ungrouping output (override with `.groups` argument)
titanic_data %>% select(Name,Age) %>% arrange(desc(Age)) %>% head(10)
titanic_data %>% group_by(Sex,Pclass) %>%
summarise(count = n_distinct(Name))
`summarise()` regrouping output by 'Sex' (override with `.groups` argument)
titanic_data %>% group_by(Sex,Pclass) %>%
summarise(count = n_distinct(Name)) %>%
mutate(count_2 = count/2)
`summarise()` regrouping output by 'Sex' (override with `.groups` argument)
titanic_data %>% group_by(Sex,Pclass) %>%
summarise(count = n_distinct(Name)) %>%
spread(Sex,count)
`summarise()` regrouping output by 'Sex' (override with `.groups` argument)