Week 4

Author

Yasiru Dilshan

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins)
data("penguins")
penguins
# A tibble: 344 × 8
   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
 1 Adelie  Torgersen           39.1          18.7               181        3750
 2 Adelie  Torgersen           39.5          17.4               186        3800
 3 Adelie  Torgersen           40.3          18                 195        3250
 4 Adelie  Torgersen           NA            NA                  NA          NA
 5 Adelie  Torgersen           36.7          19.3               193        3450
 6 Adelie  Torgersen           39.3          20.6               190        3650
 7 Adelie  Torgersen           38.9          17.8               181        3625
 8 Adelie  Torgersen           39.2          19.6               195        4675
 9 Adelie  Torgersen           34.1          18.1               193        3475
10 Adelie  Torgersen           42            20.2               190        4250
# ℹ 334 more rows
# ℹ 2 more variables: sex <fct>, year <int>
penguins%>%
group_by(species)%>%
  ggplot(aes(x=bill_length_mm, colour = species, fill = species))+geom_density()
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_density()`).

data("penguins")
penguins
# A tibble: 344 × 8
   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
 1 Adelie  Torgersen           39.1          18.7               181        3750
 2 Adelie  Torgersen           39.5          17.4               186        3800
 3 Adelie  Torgersen           40.3          18                 195        3250
 4 Adelie  Torgersen           NA            NA                  NA          NA
 5 Adelie  Torgersen           36.7          19.3               193        3450
 6 Adelie  Torgersen           39.3          20.6               190        3650
 7 Adelie  Torgersen           38.9          17.8               181        3625
 8 Adelie  Torgersen           39.2          19.6               195        4675
 9 Adelie  Torgersen           34.1          18.1               193        3475
10 Adelie  Torgersen           42            20.2               190        4250
# ℹ 334 more rows
# ℹ 2 more variables: sex <fct>, year <int>
penguins%>%
  group_by(species)%>%
  ggplot(aes(x=bill_length_mm, colour = species, fill = species))+geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).

data("penguins")
penguins
# A tibble: 344 × 8
   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
 1 Adelie  Torgersen           39.1          18.7               181        3750
 2 Adelie  Torgersen           39.5          17.4               186        3800
 3 Adelie  Torgersen           40.3          18                 195        3250
 4 Adelie  Torgersen           NA            NA                  NA          NA
 5 Adelie  Torgersen           36.7          19.3               193        3450
 6 Adelie  Torgersen           39.3          20.6               190        3650
 7 Adelie  Torgersen           38.9          17.8               181        3625
 8 Adelie  Torgersen           39.2          19.6               195        4675
 9 Adelie  Torgersen           34.1          18.1               193        3475
10 Adelie  Torgersen           42            20.2               190        4250
# ℹ 334 more rows
# ℹ 2 more variables: sex <fct>, year <int>
penguins%>%
  group_by(species)%>%
  ggplot(aes(x=bill_length_mm, colour = species, fill = species))+geom_boxplot(alpha=0.5)+
  theme(axis.text = element_text(size = 16),axis.title = element_text(size=16))
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_boxplot()`).

penguins %>% 
  ggplot(aes(x=species,color=species, fill=species))+ geom_bar(alpha=0.5)+
  theme(axis.text = element_text(size = 16),axis.title = element_text(size=16))

penguins %>% 
  ggplot(aes(x=year,color=species, fill=species))+geom_bar()+theme(axis.text = element_text(size=16),axis.title=element_text(size=16))

penguins %>% 
  ggplot(aes(x=year,color=species, fill=species))+geom_bar(position = "dodge")+theme(axis.text = element_text(size=16),axis.title=element_text(size=16))

penguins %>% 
  ggplot(aes(x=island,color=species, fill=species))+
  geom_bar()+theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))

penguins %>% 
  ggplot(aes(x=bill_length_mm, y = bill_depth_mm))+geom_point()+geom_smooth(method = "lm")+theme(axis.text=element_text(size=16),axis.title=element_text(size=16))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).

penguins %>% 
  ggplot(aes(x=bill_length_mm, 
             y = bill_depth_mm,color=species, 
             fill=species))+geom_point()+geom_smooth(method = "lm",se=FALSE)+
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).

penguins %>% 
  na.omit() %>% 
  ggplot(aes(x=sex, y = body_mass_g,color=species, fill=species))+
  geom_boxplot(alpha=0.7)+
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))

penguins %>% 
  na.omit() %>% 
  ggplot(aes(x=species, y = body_mass_g,color=sex,  fill=sex))+geom_boxplot(alpha=0.7)+theme(axis.text=element_text(size=16),axis.title=element_text(size=16))

Question 1

Can Body mass predict bill length?

How to find answer?

Need to find relationship between the body mass and bill length

Scatter-plot with a linear regression will help to do this

library(ggplot2)
penguins%>%
  na.omit() %>% 
  ggplot(aes(x=body_mass_g, y = bill_length_mm,color=species,))+geom_point()+
  geom_smooth(method = "lm", se=FALSE)+ theme(axis.text=element_text(size=16),axis.title=element_text(size=16))+labs(title = "Body mass vs bill length by Specues") 
`geom_smooth()` using formula = 'y ~ x'

Discussion

The trend between body mass and bill length within each species is shown by the linear regression lines for that species.

Conclusion

Body mass can be used to predict bill length, according to the scatterplot with regression lines; the strength of this association varies slightly between species. There may be some variance around the trend lines, though, so this relationship may not be entirely linear.

Question 2

Does sex explain flipper length

To this need to find significant difference between flipper length between male and female penguins.

library(ggplot2)
penguins%>%
  na.omit() %>% 
  ggplot(aes(x=sex, y =flipper_length_mm,fill=species))+geom_boxplot()+theme(axis.text=element_text(size=16),axis.title=element_text(size=16))+labs(title = "Flipper length by sex and species") 

Discussion

Conclusion

Gentoo males have the longest flippers, with a median flipper length around 220 mm, while Adelie females have the shortest median flipper length, just below 190 mm.

According to the figure, flipper length is highly influenced by both sex and species. Gentoo penguins often have the longest flippers of any sex, with males typically having longer flippers than females.

Hello Week 4 Assessment

library(tidyverse)
library(modeldata)

Attaching package: 'modeldata'
The following object is masked _by_ '.GlobalEnv':

    penguins
The following object is masked from 'package:palmerpenguins':

    penguins
?crickets
starting httpd help server ...
 done
View(crickets)

ggplot(crickets, aes(x=temp, y=rate, color=species))+geom_point()+
  labs(x="Temperature", y="Chirp rate", color= "Species",
       title = "Cricket chirps",
       caption ="Source: McDonald (2009)" )+
  scale_color_brewer(palette = "Dark2")

#Modifying basic properties of the plot
ggplot(crickets, aes(x=temp, y=rate, color=species))+
  geom_point(color="red", size=2,alpha=.3, shape="square")+
  labs(x="Temperature", y="Chirp rate", color= "Species",
       title = "Cricket chirps",
       caption ="Source: McDonald (2009)" )

# Learn more about the options for the geom
# with ?geom_point
#adding another layer
ggplot(crickets, aes(x=temp, y=rate,))+
  geom_point()+geom_smooth(method = "lm", se=FALSE)+
  labs(x="Temperature", y="Chirp rate",
       title = "Cricket chirps",
       caption ="Source: McDonald (2009)" )
`geom_smooth()` using formula = 'y ~ x'

ggplot(crickets, aes(x=temp, y=rate, color=species))+geom_point()+
  geom_smooth(method = "lm",se=FALSE)+
  labs(x="Temperature", y="Chirp rate", color= "Species",
       title = "Cricket chirps",
       caption ="Source: McDonald (2009)" )+
  scale_color_brewer(palette = "Dark2")
`geom_smooth()` using formula = 'y ~ x'

#other plots

ggplot(crickets, aes(x=rate))+
  geom_histogram(bins = 15)

ggplot(crickets, aes(x=rate))+
  geom_freqpoly(bins = 15)

ggplot(crickets, aes(x=species))+
  geom_bar(color="black", fill="lightblue")

ggplot(crickets, aes(x=species, fill =species ))+
  geom_bar(show.legend = FALSE) +
  scale_fill_brewer(palette = "Dark2")

ggplot(crickets, aes(x=species, y=rate,
                     color=species))+
  geom_boxplot(show.legend = FALSE)+
  scale_color_brewer(palette = "Dark2") + theme_minimal()

# faceting
#not great:
ggplot(crickets, aes(x=rate,
                     fill = species))+
  geom_histogram(bin=15,)+
  scale_fill_brewer(palette = "Dark2")
Warning in geom_histogram(bin = 15, ): Ignoring unknown parameters: `bin`
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(crickets, aes(x=rate,
                     fill = species))+
  geom_histogram(bins=15, show.legend = FALSE)+
  facet_wrap(~species)+scale_fill_brewer(palette = "Dark2")

#Done and Dusted