Import data

# excel file
data <- read_excel("../00_data/myData_charts.xlsx", sheet = "myData", skip = 1)
## New names:
## • `` -> `...1`
data
## # A tibble: 195 × 18
##     ...1 Breed   Affec…¹ Good …² Good …³ Shedd…⁴ Coat …⁵ Drool…⁶ Coat …⁷ Coat …⁸
##    <dbl> <chr>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>  
##  1   167 Plott …       0       0       0       0       0       0 Plott … Plott …
##  2     6 Poodles       5       5       3       1      NA       1 Curly   Long   
##  3    13 Yorksh…       5       5       3       1       5       1 Silky   Long   
##  4    20 Shih T…       5       5       5       1       4       1 Double  Long   
##  5    38 Maltese       5       3       3       1       4       1 Silky   Long   
##  6    45 Bichon…       5       5       5       1       5       1 Double  Long   
##  7    58 Soft C…       5       5       3       1       4       2 Wavy    Medium 
##  8    62 Aireda…       3       3       3       1       3       1 Wiry    Short  
##  9    78 Lhasa …       5       3       3       1       3       1 Silky   Long   
## 10    79 Chines…       4       3       3       1       2       1 Hairle… Short  
## # … with 185 more rows, 8 more variables: `Openness To Strangers` <dbl>,
## #   `Playfulness Level` <dbl>, `Watchdog/Protective Nature` <dbl>,
## #   `Adaptability Level` <dbl>, `Trainability Level` <dbl>,
## #   `Energy Level` <dbl>, `Barking Level` <dbl>,
## #   `Mental Stimulation Needs` <dbl>, and abbreviated variable names
## #   ¹​`Affectionate With Family`, ²​`Good With Young Children`,
## #   ³​`Good With Other Dogs`, ⁴​`Shedding Level`, ⁵​`Coat Grooming Frequency`, …
data <- data %>% 
    janitor::clean_names()

Introduction

Questions

Variation

Visualizing distributions

data %>%
    ggplot(aes(x = shedding_level)) +
    geom_histogram(binwidth = 0.5)

Typical values

data %>%
    
    # Filter out shedding level > 2
    filter(shedding_level > 2) %>%
    
    # Plot
    ggplot(aes(x = shedding_level)) +
    geom_histogram(binwidth = 0.4)

Unusual values

Missing Values

Covariation

data %>%
    
    ggplot (aes(x = shedding_level, y = coat_grooming_frequency))

A categorical and continuous variable

data %>% 
    count(shedding_level, coat_grooming_frequency) %>% 
    ggplot(aes(x = shedding_level, y = coat_grooming_frequency, fill = n)) +
    geom_tile()
## Warning: Removed 1 rows containing missing values (geom_tile).

Two categorical variables

Two continous variables

Patterns and models