library(skimr)
library(visdat)
library(tidyverse)
library(plotly)
library(knitr)
library(glancedata)
library(faraway)
data("teengamb")
kable(head(teengamb, col.names = c("Sex", "Status", "Income", "Verbal", "Gamble")))
| 1 |
51 |
2.00 |
8 |
0.0 |
| 1 |
28 |
2.50 |
8 |
0.0 |
| 1 |
37 |
2.00 |
6 |
0.0 |
| 1 |
28 |
7.00 |
4 |
7.3 |
| 1 |
65 |
2.00 |
8 |
19.6 |
| 1 |
61 |
3.47 |
6 |
0.1 |
skim(teengamb)
Data summary
| Name |
teengamb |
| Number of rows |
47 |
| Number of columns |
5 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
5 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| sex |
0 |
1 |
0.40 |
0.50 |
0.0 |
0.0 |
0.00 |
1.00 |
1 |
▇▁▁▁▆ |
| status |
0 |
1 |
45.23 |
17.26 |
18.0 |
28.0 |
43.00 |
61.50 |
75 |
▇▅▇▃▆ |
| income |
0 |
1 |
4.64 |
3.55 |
0.6 |
2.0 |
3.25 |
6.21 |
15 |
▇▃▂▂▁ |
| verbal |
0 |
1 |
6.66 |
1.86 |
1.0 |
6.0 |
7.00 |
8.00 |
10 |
▁▂▆▇▃ |
| gamble |
0 |
1 |
19.30 |
31.52 |
0.0 |
1.1 |
6.00 |
19.40 |
156 |
▇▁▁▁▁ |
glancedata::glance_data(teengamb)
glancedata::glance_data_in_workbook(teengamb)
## $all
## # A tibble: 5 x 11
## name type distinct_values minimum median maximum mean sd na_proportion
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 sex nume… 2 0 0 1 0.404 0.496 0
## 2 stat… nume… 16 18 43 75 45.2 17.3 0
## 3 inco… nume… 26 0.6 3.25 15 4.64 3.55 0
## 4 verb… nume… 9 1 7 10 6.66 1.86 0
## 5 gamb… nume… 36 0 6 156 19.3 31.5 0
## # … with 2 more variables: count <chr>, sample_values <chr>
##
## $summary
## # A tibble: 2 x 2
## cat n
## <chr> <int>
## 1 binary 1
## 2 numerical 4
##
## $all_nas
## # A tibble: 0 x 6
## # … with 6 variables: name <chr>, type <chr>, distinct_values <int>,
## # na_proportion <dbl>, count <chr>, sample_values <chr>
##
## $single_value
## # A tibble: 0 x 11
## # … with 11 variables: name <chr>, type <chr>, distinct_values <int>,
## # minimum <dbl>, median <dbl>, maximum <dbl>, mean <dbl>, sd <dbl>,
## # na_proportion <dbl>, count <chr>, sample_values <chr>
##
## $binary
## # A tibble: 1 x 11
## name type distinct_values minimum median maximum mean sd na_proportion
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 sex nume… 2 0 0 1 0.404 0.496 0
## # … with 2 more variables: count <chr>, sample_values <chr>
##
## $numerical
## # A tibble: 4 x 10
## name type distinct_values minimum median maximum mean sd na_proportion
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stat… nume… 16 18 43 75 45.2 17.3 0
## 2 inco… nume… 26 0.6 3.25 15 4.64 3.55 0
## 3 verb… nume… 9 1 7 10 6.66 1.86 0
## 4 gamb… nume… 36 0 6 156 19.3 31.5 0
## # … with 1 more variable: sample_values <chr>
##
## $categorical
## # A tibble: 0 x 5
## # … with 5 variables: name <chr>, distinct_values <int>, na_proportion <dbl>,
## # count <chr>, sample_values <chr>
glancedata::plot_discrete_vars(teengamb)
## TableGrob (2 x 2) "arrange": 3 grobs
## z cells name grob
## sex 1 (1-1,1-1) arrange gtable[layout]
## verbal 2 (1-1,2-2) arrange gtable[layout]
## 3 (2-2,1-2) arrange text[GRID.text.89]
glancedata::plot_numerical_vars(teengamb,plot_type = "histogram")
glancedata::plot_numerical_vars(teengamb,plot_type = "density")
glancedata::plot_numerical_vars(teengamb, plot_type = "boxplot")
glancedata::plot_numerical_vars(teengamb, plot_type = "qqplot")
glancedata::plot_numerical_vars(teengamb, plot_type = "pairwise")

teen = teengamb
teen$income = teen$income[teen$income>70]<-NA
teen$gamble = teen$gamble[teen$gamble>30]<-NA
teen$verbal= teen$verbal[teen$verbal<3]<-NA
visdat::vis_miss(teen,sort_miss = T, show_perc = T, cluster = T)

teengamb$sex = as.factor(teengamb$sex)
levels(teengamb$sex) = c("Male", "Female")
p = ggplot(teengamb, aes(x =status, y = income, color = sex)) + geom_point()
ggplotly(p)