library(ggplot2)
library(readr)
USStates <- read_csv("Documents/R code for Stats/USStates.csv")
## Rows: 50 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): State, Region, Elect2016
## dbl (19): HouseholdIncome, Population, EighthGradeMath, HighSchool, College,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(USStates)

1.

median_ <- median(USStates$StudentSpending)
ggplot(data = USStates, mapping = aes(x = StudentSpending)) + 
  geom_histogram(bins = 10, color = 'purple') + 
  geom_vline(aes(xintercept = median_), color = 'red', size = 2) + 
  geom_text(aes(label = 'Median = 11.3195', 11.2), y = 10, color = 'yellow') + 
  labs(title = "Student Spending in the US States", x = "Amount of Money Spent", y = "Number of States")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_text(aes(label = "Median = 11.3195", 11.2), y = 10, color = "yellow"): All aesthetics have length 1, but the data has 50 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

2.

ggplot(data = USStates, mapping = aes(x = Elect2016)) + 
  geom_bar(color = c("blue", "red")) +
  labs(title = "Number of States voting Democrat or Republican", x = "Votes in 2016 Election", y = "Number of States")

3.

ggplot(data= USStates, mapping=aes(x = Elect2016, y = HouseholdIncome)) +
  geom_boxplot(aes(group = Elect2016)) + 
  labs(title = "2016 Vote with Median Household Incomes", x = "Vote for 2016 Election", y = "Median Household Incomes (Thousands)")

4.

ggplot(data = USStates, mapping = aes(x = PhysicalActivity, y =  Obese)) +
  geom_point(color = 'orange') +
  labs(title = "Obesity against Physical Activity", x = "Physical Activity", y = "Obestity")

5.

ggplot(data = USStates, mapping = aes(x = PhysicalActivity, y = Obese, color = Region)) +
  geom_point() +
  labs(title = "Obesity against Physcial Activity for Different Regions", x = "Physical Activity", y = "Obesitity")

6.

library(readr)
Inflation <- read_csv("Downloads/Inflation.csv")
## Rows: 60 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Year, InflationRate, AnnualChange
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Inflation)
ggplot(data = Inflation, mapping = aes(x = Year, y = InflationRate)) +
  geom_point(size = 2, color = 'lightpink') +
  geom_smooth(color = 'purple') +
  labs(title = "Inflation Rate from 1960 to 2020", x = "Year", y = "Inflation Rate")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

7.

library(Lahman)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
View(Batting)

Ortiz <- Batting %>%
  filter(playerID == "ortizda01") %>%
  mutate(BA = H/AB)

ggplot(data = Ortiz, mapping = aes(x = yearID, y = BA, color = teamID)) +
  geom_line(aes(group = 1), color = "black", size = 0.4) + 
  geom_point(size = 1) +
  scale_color_manual(name = "Team", values = c("BOS" = "red", "MIN" = "navy" ), labels = c("Boston", "Minnesota")) +
  labs(title = "David Ortiz's Batting Average Over \n the Course of his Career", 
       x = "Year",
       y = "Batting Average") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(aes(label = paste0( H, "\n", AB)), vjust = -0.5, size = 2, color = "black") +
  ylim(0, 0.4) +
  scale_x_continuous(
    breaks = seq(1996, 2016, by = 4),
    limits = c(1996, 2016))