library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
df <- read_excel("churn.xlsx")
head(df)
## # A tibble: 6 × 14
## RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance
## <dbl> <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl> <chr>
## 1 1 15634602 Hargra… 619 France Female NA 2 0
## 2 2 15647311 Hill 608 <NA> Female 41 1 83807.…
## 3 3 15619304 Onio 502 France Female 42 8 159660…
## 4 4 15701354 Boni 699 France Female 39 1 0
## 5 5 15737888 Mitche… 850 <NA> Female 43 2 125510…
## 6 6 15574012 Chu 645 Spain Male 44 8 113755…
## # ℹ 5 more variables: NumOfProducts <dbl>, HasCrCard <dbl>,
## # IsActiveMember <dbl>, EstimatedSalary <chr>, Exited <dbl>
colSums(is.na(df))
## RowNumber CustomerId Surname CreditScore Geography
## 0 0 0 0 3
## Gender Age Tenure Balance NumOfProducts
## 3 3 0 2 0
## HasCrCard IsActiveMember EstimatedSalary Exited
## 0 0 0 0
str(df)
## tibble [10,000 × 14] (S3: tbl_df/tbl/data.frame)
## $ RowNumber : num [1:10000] 1 2 3 4 5 6 7 8 9 10 ...
## $ CustomerId : num [1:10000] 15634602 15647311 15619304 15701354 15737888 ...
## $ Surname : chr [1:10000] "Hargrave" "Hill" "Onio" "Boni" ...
## $ CreditScore : num [1:10000] 619 608 502 699 850 645 822 376 501 684 ...
## $ Geography : chr [1:10000] "France" NA "France" "France" ...
## $ Gender : chr [1:10000] "Female" "Female" "Female" "Female" ...
## $ Age : num [1:10000] NA 41 42 39 43 44 NA 29 44 27 ...
## $ Tenure : num [1:10000] 2 1 8 1 2 8 7 4 4 2 ...
## $ Balance : chr [1:10000] "0" "83807.86" "159660.8" "0" ...
## $ NumOfProducts : num [1:10000] 1 1 3 2 1 2 2 4 2 1 ...
## $ HasCrCard : num [1:10000] 1 0 1 0 1 1 1 1 0 1 ...
## $ IsActiveMember : num [1:10000] 1 1 0 0 1 0 1 0 1 1 ...
## $ EstimatedSalary: chr [1:10000] "101348.88" "112542.58" "113931.57" "93826.63" ...
## $ Exited : num [1:10000] 1 0 1 0 0 1 0 1 0 0 ...
df$Balance <- as.numeric(df$Balance)
df$Balance[is.na(df$Balance)] <- mean(df$Balance, na.rm=TRUE)
df$Age[is.na(df$Age)] <- mean(df$Age, na.rm=TRUE)
df$Gender[is.na(df$Gender)] <- names(sort(-table(df$Gender))[1])
df$Geography[is.na(df$Geography)] <- names(sort(-table(df$Geography))[1])
colSums(is.na(df))
## RowNumber CustomerId Surname CreditScore Geography
## 0 0 0 0 0
## Gender Age Tenure Balance NumOfProducts
## 0 0 0 0 0
## HasCrCard IsActiveMember EstimatedSalary Exited
## 0 0 0 0
#df <- as.data.frame(lapply(df, function(x) gsub("[[:punct:]]", "",x)))
str(df)
## tibble [10,000 × 14] (S3: tbl_df/tbl/data.frame)
## $ RowNumber : num [1:10000] 1 2 3 4 5 6 7 8 9 10 ...
## $ CustomerId : num [1:10000] 15634602 15647311 15619304 15701354 15737888 ...
## $ Surname : chr [1:10000] "Hargrave" "Hill" "Onio" "Boni" ...
## $ CreditScore : num [1:10000] 619 608 502 699 850 645 822 376 501 684 ...
## $ Geography : chr [1:10000] "France" "France" "France" "France" ...
## $ Gender : chr [1:10000] "Female" "Female" "Female" "Female" ...
## $ Age : num [1:10000] 38.9 41 42 39 43 ...
## $ Tenure : num [1:10000] 2 1 8 1 2 8 7 4 4 2 ...
## $ Balance : num [1:10000] 0 83808 159661 0 125511 ...
## $ NumOfProducts : num [1:10000] 1 1 3 2 1 2 2 4 2 1 ...
## $ HasCrCard : num [1:10000] 1 0 1 0 1 1 1 1 0 1 ...
## $ IsActiveMember : num [1:10000] 1 1 0 0 1 0 1 0 1 1 ...
## $ EstimatedSalary: chr [1:10000] "101348.88" "112542.58" "113931.57" "93826.63" ...
## $ Exited : num [1:10000] 1 0 1 0 0 1 0 1 0 0 ...
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
dens <- density(df$Age)
plot_ly(
data = df,
x = ~Age,
type ="histogram"
)%>%add_lines(
x = ~dens$x,
y = ~dens$y*length(df$Age),
type = "scatter",
mode = "line"
)
plot_ly(
data = df,
x = ~Gender,
y = ~Age,
type = "bar",
marker = list(color=ifelse(df$Gender=="Female","red","grey"))
)
df_group <- df%>% group_by(Gender) %>% summarise(
Age = mean(Age),
Exited = sum(Exited)
)
plot_ly(
data = df_group,
x = ~Gender,
y = ~Age,
type = "bar",
text = "Gender",
marker = list(color=ifelse(df_group$Gender=="Female","red","grey"))
)%>%add_trace(
x =~Gender,
y = ~Exited,
marker = list(color=ifelse(df_group$Gender=="Female","red","grey"))
)
plot_ly(
data = df,
x = ~Age,
y = ~NumOfProducts,
type = "scatter",
mode ="lines"
)
plot_ly(
data = df,
y = ~Age,
x = ~EstimatedSalary,
type = "scatter",
mode ="markers"
)
plot_ly(
data = df,
y = ~Age,
type = "box",
boxmean = TRUE
)%>% layout(
yaxis = list(range = c(0, max(df$Age)+10))
)
library(readr)
## Warning: package 'readr' was built under R version 4.3.3
dh <- read.csv("ds_salaries.csv")
df_new <- dh%>% group_by(work_year) %>% summarise(
mean_salary = mean(salary)
)
plot_ly(
data = df_new,
x = ~work_year,
y = ~mean_salary,
type = "scatter",
mode = "marker"
)%>%layout(
title = "Linear",
xaxis = list(title="Year"),
yaxis = list(title="Mean Salary", range=c(0,600000))
)
str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
# Plot stacked bar berdasarkan jenis mobil dan transmisi
ggplot(mpg, aes(x = class, fill = drv)) +
geom_bar()

head(mpg,100)
## # A tibble: 100 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # ℹ 90 more rows
library(ggplot2)
data("mpg")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mpgg <- mpg %>% count(class, drv) %>% arrange(class,drv)
plot_ly(
data = mpgg,
x = ~class,
y = ~n,
color = ~drv,
type="bar",
hoverinfo ="text",
text = ~paste(class,drv,n)
)%>%layout(
barmode ="stack"
)
cor(df[,c("Age","NumOfProducts","CreditScore")])
## Age NumOfProducts CreditScore
## Age 1.000000000 -0.03062867 -0.004032286
## NumOfProducts -0.030628669 1.00000000 0.012237879
## CreditScore -0.004032286 0.01223788 1.000000000
# Gunakan dataset mtcars
data(mtcars)
# Buat plotly scatter plot interaktif
plot <- plot_ly(data = mtcars, x = ~mpg, y = ~hp, type = "choropleth",
marker = list(size = 10, opacity = 0.8))
# Tampilkan plot
plot
## Warning: 'choropleth' objects don't have these attributes: 'x', 'y'
## Valid attributes include:
## 'autocolorscale', 'coloraxis', 'colorbar', 'colorscale', 'customdata', 'customdatasrc', 'featureidkey', 'geo', 'geojson', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'locationmode', 'locations', 'locationssrc', 'marker', 'meta', 'metasrc', 'name', 'reversescale', 'selected', 'selectedpoints', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'z', 'zauto', 'zmax', 'zmid', 'zmin', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'