Week 1

MONDAY July 24 2023

2. Install and load tidyverse package

#install the package
install.packages("tidyverse")

#load package
library(tidyverse)

3. Make a table with my demographic information

df <- tibble(
      name = "Alice",
      age = "19",
      eyeColor = "purple",
      driver = "yes",
      height = "169",
      units = "cm",
      favoritePet = "bulbasaur"
)




WEDNESDAY July 26 2023

1. Clear environment and load packages

#this clears the environment from any holdover data or values we may have stored and no longer need
rm(list = ls())

#load packages used today
library(tidyverse)
library(ggplot2)

2. Upload CSV sheet. read.csv is the base R function, and read_csv is the tidyverse function which automatically reads in your CSV as a tibble.

# Read data from a csv file. Here, I used `read_csv` [notice underscore] so that my file is imported as a tibble. 
df <- read_csv("20230726_h2l2c_demo_tables_together.csv")
## Rows: 13 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): name, eye_color, driver, height, units, fave_pet
## dbl (1): age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View our new dataframe 
#View(df)

3. Different ways to view data

#use the dollar sign to specify your column
df$name
##  [1] "Garima"  "Kalynn"  "Bhavyaa" "Yijun"   "Johanna" "Matthew" "Nyssa"  
##  [8] "Jenna"   "Meghan"  "Nikolas" "Cindy"   "Devin"   "Gabi"
df$units
##  [1] "cm"      "ft"      "ft"      "celsius" "in"      "cm"      "cm"     
##  [8] "cm"      "in"      "cm"      "cm"      "cm"      "cm"
df$eye_color
##  [1] "brown" "green" "black" "brown" "hazel" "blue"  "blue"  "hazel" "blue" 
## [10] "Brown" "brown" "Hazel" "Green"
#or use brackets in the notation [row,column]
#if you don't specify the row, it will take all rows
#if you don't specify column, it will take all columns

#df row 1, all columns
df[1,]
#df row 8, all columns
df[8,]
#df column 1, all rows
df[,1]
#df column 5, all rows
df[,5]
#df row 1, column 7
df[1,7]

4. Try different descriptives to understand your df

#get counts of rows and columns
nrow(df)
## [1] 13
ncol(df)
## [1] 7
#use table() to get a COUNT of your represented categories
#this is also a shortcut to see WHAT categories you have in your data
table(df$age)
## 
## 21 23 24 25 27 28 29 32 
##  1  1  1  2  2  1  4  1
table(df$driver)
## 
## yes Yes 
##  12   1
table(df$fave_pet)
## 
##    cat    dog   none  Sally turtle 
##      2      8      1      1      1

some quick descriptive stats

mean(df$age)
## [1] 26.76923
median(df$height)
## [1] "180"

5. Visualize!

#base R

hist(df$age)

ggplot2 ~ being super extra

#save custom ggplot aesthetics that I will use in the next code chunk
gghisto <- list(
  theme(axis.text.x = element_text(face="bold", size=10, color = "Navyblue", angle=30),
          axis.text.y = element_text(face="bold", 
          size=14),
          axis.title=element_text(size=17),
          plot.title = element_text(size=17,face="bold")))
#ggplot in action
ggplot(df, aes(x=fave_pet)) +
  geom_bar(lwd = .8, , fill = alpha("hotpink3", 0.8), colour = alpha("black", 0.7)) +
  theme(panel.background = element_rect(fill = "lightblue"),legend.position = "none") +
  ggtitle("Histogram of favorite pet frequencies") +
  ylab("Frequency") +
  xlab("Pet") + gghisto 

ggplot(data=df, aes(name, height, fill=name)) +
  geom_bar(stat = "identity", lwd = .8, colour = alpha("black", 0.7)) +
  theme(panel.background = element_rect(fill = "lightblue"),legend.position = "none") +
  ggtitle("Histogram of Names and Heights") +
  ylab("Height in mystery units") +
  xlab("Student Name") + gghisto