ggplot_basics

Import Data

remove(list=ls())
getwd()
[1] "/Users/caitlinodriscoll/Desktop/BCE/R FILES/Day 5"
train <- read.csv("~/Desktop/BCE/R FILES/Day 5/train (2).csv")

Installing and loading the package ggplot2

# install.packages("ggplot2")
library(ggplot2)
hist(x = train$Fare)

ggplot(data = train, 
       mapping = aes(y= Fare, x = Pclass)
       ) + geom_point(colour = "pink") + theme_light()

?theme_light
# dplyr::filter(iris, Sepal.Length> 7.5) # using package to test


train_adults<-
dplyr::filter(train, Age> 16)


ggplot(data = train, 
       mapping = aes(y= Fare, x = Age)
       ) + geom_point(colour = "pink") + theme_light()
Warning: Removed 177 rows containing missing values or values outside the scale range
(`geom_point()`).

dplyr::filter(train, Age> 16) |>
ggplot(data = train, 
       mapping = aes(y= Fare, x = Age)
       ) + geom_point(colour = "pink") + theme_light()
Warning: Removed 177 rows containing missing values or values outside the scale range
(`geom_point()`).

redo

library(ggplot2)

#dplyr::filter(train, Age > 16 & Fare < 300 |>
#                ggplot(mapping = aes(y = Fare, x = Age)
#                       )geom_point(colour = "pink")+ theme_light()

# dplyr::filter(train, Age>16 & Fare <300) |>
#  ggplot(
#    mapping = aes(y = Fare, x = Age)
#    )geom_point()+geom_smooth()
# install.packages("reshape2")


# melt the dataframe into the long format

library(ggplot2)

df_melted <- reshape2::melt(data = train)
Using Name, Sex, Ticket, Cabin, Embarked as id variables
# create a histogram using ggplot 2
ggplot(data = df_melted,
       mapping = aes(value)) +
  geom_histogram() + facet_wrap(~variable)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 177 rows containing non-finite outside the scale range
(`stat_bin()`).