Data Analyst

danzarchive

library('dplyr')
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('tidyr')
library('readr')
library('stringr')
library('forcats')
library('modelr')

Material Praktikum

Lorem ipsum dolor sit amet. Sit similique omnis a quam nihil aut inventore ipsum in rerum internos. Qui voluptatem iusto hic odit deserunt est deserunt vero eos voluptate omnis At dolor repudiandae sed cupiditate porro. Et consectetur eaque aut iste officia a omnis animi.

library(ggplot2)

netflix <- read.csv("netflix_titles.csv")
custom_col <- c("#776B5D", "#B0A695", "#EBE3D5")
pie <- ggplot(netflix, aes(x = "", fill = factor(type))) +
  geom_bar(width = 1) +
  theme(axis.line = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 22)) +
  labs(fill = "class",
       x = NULL,
       y = NULL,
       tittle = "Pie Chart of Netflix shows") +
  coord_polar(theta = "y", start = 0) +
  scale_fill_manual(values = custom_col)

ggplot(netflix, aes(x = "", fill = factor(type))) +
  geom_bar(width = 1) +
  scale_fill_manual(values = custom_col)

pie

Data Analyst Train Information

Lorem ipsum dolor sit amet. Sit similique omnis a quam nihil aut inventore ipsum in rerum internos. Qui voluptatem iusto hic odit deserunt est deserunt vero eos voluptate omnis At dolor repudiandae sed cupiditate porro. Et consectetur eaque aut iste officia a omnis animi.

train <- read.csv("train (1).csv")
head(train)
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q
train <- train %>% mutate(
  Survived = factor(Survived),
  Pclass = factor(Pclass),
  Embarked = factor(Embarked),
  Sex = factor(Sex)
)

head(train)
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q
summary(train)
##   PassengerId    Survived Pclass      Name               Sex     
##  Min.   :  1.0   0:549    1:216   Length:891         female:314  
##  1st Qu.:223.5   1:342    2:184   Class :character   male  :577  
##  Median :446.0            3:491   Mode  :character               
##  Mean   :446.0                                                   
##  3rd Qu.:668.5                                                   
##  Max.   :891.0                                                   
##                                                                  
##       Age            SibSp           Parch           Ticket         
##  Min.   : 0.42   Min.   :0.000   Min.   :0.0000   Length:891        
##  1st Qu.:20.12   1st Qu.:0.000   1st Qu.:0.0000   Class :character  
##  Median :28.00   Median :0.000   Median :0.0000   Mode  :character  
##  Mean   :29.70   Mean   :0.523   Mean   :0.3816                     
##  3rd Qu.:38.00   3rd Qu.:1.000   3rd Qu.:0.0000                     
##  Max.   :80.00   Max.   :8.000   Max.   :6.0000                     
##  NA's   :177                                                        
##       Fare           Cabin           Embarked
##  Min.   :  0.00   Length:891          :  2   
##  1st Qu.:  7.91   Class :character   C:168   
##  Median : 14.45   Mode  :character   Q: 77   
##  Mean   : 32.20                      S:644   
##  3rd Qu.: 31.00                              
##  Max.   :512.33                              
## 
custom_col <- c("#776B5D", "#B0A695", "#EBE3D5")
ggplot(train, aes(x = "", fill = factor(Pclass))) +
  geom_bar(width = 1) + theme(axis.line = element_blank(),
                              plot.title = element_text(hjust = 0.5, size = 22)) +
  labs(fill = "class",
       x = NULL,
       y = NULL,
       tittle = "Pie Chart of P") + coord_polar(theta = "y", start = 0) +
  scale_fill_manual(values = custom_col)

p_age = ggplot(train) +
  geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
  theme(legend.position = "right") +
  scale_fill_manual(values = custom_col)

p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Sex') +
  scale_fill_manual(values = custom_col)

p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Pclass') +
  theme(legend.position = "none") +
  scale_fill_manual(values = custom_col)

p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Embarked') +
  theme(legend.position = "none")+
  scale_fill_manual(values = custom_col)

p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
  geom_bar(stat='count', position = 'fill') +
  labs(x = 'SibSp') +
  theme(legend.position = "none") +
  scale_fill_manual(values = custom_col)

p_par = ggplot(train, aes(Parch, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Parch') +
  theme(legend.position = "none") +
  scale_fill_manual(values = custom_col)

p_fare = ggplot(train) +
  geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
  scale_x_log10() +
  theme(legend.position = "none") +
  scale_fill_manual(values = custom_col)

p_age
Fig. 2

Fig. 2

p_sex
Fig. 2

Fig. 2

p_fare
Fig. 2

Fig. 2

p_class
Fig. 2

Fig. 2

p_emb
Fig. 2

Fig. 2

p_sib
Fig. 2

Fig. 2

p_par
Fig. 2

Fig. 2