library(palmerpenguins)
##
## Attaching package: 'palmerpenguins'
## The following objects are masked from 'package:datasets':
##
## penguins, penguins_raw
data("penguins")
#Is there a relationship between and species and Island?
head(penguins)
## # A tibble: 6 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## # ℹ 2 more variables: sex <fct>, year <int>
tail(penguins)
## # A tibble: 6 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Chinstrap Dream 45.7 17 195 3650
## 2 Chinstrap Dream 55.8 19.8 207 4000
## 3 Chinstrap Dream 43.5 18.1 202 3400
## 4 Chinstrap Dream 49.6 18.2 193 3775
## 5 Chinstrap Dream 50.8 19 210 4100
## 6 Chinstrap Dream 50.2 18.7 198 3775
## # ℹ 2 more variables: sex <fct>, year <int>
View(penguins)
str(penguins)
## tibble [344 × 8] (S3: tbl_df/tbl/data.frame)
## $ species : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ island : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: int [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : int [1:344] 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ...
## $ sex : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ...
## $ year : int [1:344] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
levels(penguins$species)
## [1] "Adelie" "Chinstrap" "Gentoo"
#dataframe
penguin_df <- penguins[, c("species", "island")]
#missing data
penguin_df_final <- na.omit(penguin_df)
#making into table
penguin_df_final <- table(penguins$species, penguins$island)
chisq_test_result <- chisq.test(penguin_df_final)
chisq_test_result
##
## Pearson's Chi-squared test
##
## data: penguin_df_final
## X-squared = 299.55, df = 4, p-value < 2.2e-16
#There is a relationship between species and Island because the p-value is lower than 0.05 and it rejected the null hypothesis
formatted_p_value <- format(chisq_test_result$p.value, scientific = FALSE)
formatted_p_value
## [1] "0.000000000000000000000000000000000000000000000000000000000000001354574"