library("tidyverse") #I will use tidyverse to visualize this data
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
read_csv("health.csv") -> health
## Rows: 50 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (8): state, state_abbr, region, gov_party, sen_party, house_party, leg_...
## dbl (10): percent_favorable_aca, percent_supporting_expansion, obama_share_1...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse("health") #glimpse lets me take a quick peek at the set
##  chr "health"
summary(health) #this summarizes the data via a bunch of variables
##     state            state_abbr           region           gov_party        
##  Length:50          Length:50          Length:50          Length:50         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   sen_party         house_party        percent_favorable_aca
##  Length:50          Length:50          Min.   :24.85        
##  Class :character   Class :character   1st Qu.:37.86        
##  Mode  :character   Mode  :character   Median :45.49        
##                                        Mean   :44.44        
##                                        3rd Qu.:50.63        
##                                        Max.   :61.97        
##                                                             
##  percent_supporting_expansion obama_share_12     ideology       
##  Min.   :37.21                Min.   :25.37   Min.   :-0.39074  
##  1st Qu.:47.20                1st Qu.:40.90   1st Qu.:-0.09172  
##  Median :51.56                Median :50.98   Median : 0.06014  
##  Mean   :50.87                Mean   :49.15   Mean   : 0.03514  
##  3rd Qu.:55.75                3rd Qu.:57.29   3rd Qu.: 0.18764  
##  Max.   :64.66                Max.   :71.70   Max.   : 0.33290  
##                                                                 
##  percent_uninsured infant_mortality_rate cancer_incidence
##  Min.   : 4.00     Min.   : 4.800        Min.   :387.1   
##  1st Qu.:12.00     1st Qu.: 5.800        1st Qu.:439.5   
##  Median :14.00     Median : 6.600        Median :462.6   
##  Mean   :14.36     Mean   : 6.688        Mean   :461.1   
##  3rd Qu.:17.00     3rd Qu.: 7.600        3rd Qu.:480.8   
##  Max.   :24.00     Max.   :10.000        Max.   :509.1   
##                    NA's   :1             NA's   :1       
##  heart_disease_death_rate life_expectancy  leg_party        
##  Min.   :119.4            Min.   :75.00   Length:50         
##  1st Qu.:153.8            1st Qu.:77.80   Class :character  
##  Median :169.2            Median :78.90   Mode  :character  
##  Mean   :175.9            Mean   :78.66                     
##  3rd Qu.:192.6            3rd Qu.:79.90                     
##  Max.   :251.1            Max.   :81.30                     
##                                                             
##   health_score        health_score_cat  
##  Min.   :-2.4764700   Length:50         
##  1st Qu.:-0.5890758   Class :character  
##  Median : 0.1064754   Mode  :character  
##  Mean   :-0.0000002                     
##  3rd Qu.: 0.7357310                     
##  Max.   : 1.5685100                     
## 
library("skimr") #this opens the package needed to skim the data
skim(health) #this also summarizes the data by individual variable 
Data summary
Name health
Number of rows 50
Number of columns 18
_______________________
Column type frequency:
character 8
numeric 10
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
state 0 1 4 14 0 50 0
state_abbr 0 1 2 2 0 50 0
region 0 1 4 14 0 50 0
gov_party 0 1 10 11 0 3 0
sen_party 0 1 4 10 0 3 0
house_party 0 1 10 10 0 2 0
leg_party 0 1 7 18 0 3 0
health_score_cat 0 1 11 14 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
percent_favorable_aca 0 1.00 44.44 8.72 24.85 37.86 45.49 50.63 61.97 ▂▅▇▅▃
percent_supporting_expansion 0 1.00 50.87 6.75 37.21 47.20 51.56 55.75 64.66 ▅▅▆▇▂
obama_share_12 0 1.00 49.15 10.47 25.37 40.90 50.98 57.29 71.70 ▂▇▇▇▃
ideology 0 1.00 0.04 0.18 -0.39 -0.09 0.06 0.19 0.33 ▂▅▆▇▇
percent_uninsured 0 1.00 14.36 4.10 4.00 12.00 14.00 17.00 24.00 ▂▅▇▅▂
infant_mortality_rate 1 0.98 6.69 1.20 4.80 5.80 6.60 7.60 10.00 ▆▇▇▁▂
cancer_incidence 1 0.98 461.08 29.02 387.10 439.50 462.60 480.80 509.10 ▁▃▅▇▅
heart_disease_death_rate 0 1.00 175.86 29.67 119.40 153.75 169.15 192.55 251.10 ▂▇▅▂▂
life_expectancy 0 1.00 78.66 1.66 75.00 77.80 78.90 79.90 81.30 ▃▂▆▇▅
health_score 0 1.00 0.00 1.00 -2.48 -0.59 0.11 0.74 1.57 ▂▂▅▇▅

Skim and Summary are very similar, but skim breaks the data down into individual rows for each variable, and summary condensed the data into more general numbers

ggplot(health, aes(x= obama_share_12, y=percent_favorable_aca, size= 8.5))+ geom_point() + labs(x= "Obama vote percentage", y= "ACA favorability percentage", title = "Obama voters compared to ACA support") + theme_bw() 

#This bunch of code creates a graph which helps to visualize the relationship between what percentage of people voted for obama, and how many are in favor of the affordable care act. labs renames the x and y axis, as well as the title to lend clarity, and the theme makes it easier to view. 
ggplot(health, aes(x= ideology, y=percent_favorable_aca, label=state.abb))+ geom_label() + labs(x= "ideology", y= "ACA favorability percentage", title = "Ideology compared to ACA support") + theme_classic() 

#This graph uses the data set health to display a comparison between states ideology scores and the percent of people who found Obamacare favorable. geom_label specifies that while it is a scatter plot, rather than points it marks the chart with state labels. The labs function marks the X axis, Y axis, and title. Theme makes it easier to view.    

I found it interesting how closely ideology score aligns with percentage of support for the Affordable Care Act. The more conservative a state was the less favorable they found it, with few major exceptions. It also makes sense that where more people voted for Obama, more people support his policy.

library(tidyverse) #this opens the package tidyverse

library(skimr) #this opens the package skimr
co2 <- read_csv("emissions.csv") #this loads the emissions data set in and renames it for easier use
## Rows: 708 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (5): year, month, ppm_avg, ppm_avg_int, ppm_trend
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(co2) #this gives a quick look at the data
## Rows: 708
## Columns: 5
## $ year        <dbl> 1959, 1959, 1959, 1959, 1959, 1959, 1959, 1959, 1959, 1959~
## $ month       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7~
## $ ppm_avg     <dbl> 315.62, 316.38, 316.71, 317.72, 318.29, 318.15, 316.54, 31~
## $ ppm_avg_int <dbl> 315.62, 316.38, 316.71, 317.72, 318.29, 318.15, 316.54, 31~
## $ ppm_trend   <dbl> 315.70, 315.88, 315.62, 315.56, 315.50, 315.92, 315.66, 31~
skim(co2) #this provides a more focused look at the different variables in this set
Data summary
Name co2
Number of rows 708
Number of columns 5
_______________________
Column type frequency:
numeric 5
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 1988.00 17.04 1959.00 1973.00 1988.00 2003.00 2017.00 ▇▇▇▇▇
month 0 1 6.50 3.45 1.00 3.75 6.50 9.25 12.00 ▇▅▅▅▇
ppm_avg 0 1 350.46 46.40 -99.99 328.88 351.36 374.72 409.65 ▁▁▁▁▇
ppm_avg_int 0 1 353.48 26.71 313.26 329.06 351.36 374.72 409.65 ▇▆▆▅▃
ppm_trend 0 1 353.48 26.65 315.50 330.02 351.63 374.86 407.59 ▇▅▅▃▃
library(dplyr)#this opens the dplyr package
avgco2 <- co2 %>% group_by(year) %>% summarise(avg_co2= mean(ppm_avg_int))
#this assigns the new variable avgco2 to a series of commands grouping emissions by year
ggplot(avgco2, aes(x= year, y=avg_co2, width=75))+ geom_line() + labs(x= "Year", y= "Average CO2 Emission", title = "Average CO2 emission by Year") + theme_bw() 

#This graph compares co2 emissions by year, and shows how they have increased over time. lab function labels the X, Y and title. The theme makes the graoh more comprehensible. 
ggplot(co2, aes(x = (factor(month)), y = ppm_avg_int, group = 1)) + geom_line() + facet_wrap(~year) + labs(x = "Month", y = "Particles per Million", title = "Emissions over time") + theme_update()

#This chunk creates a faceted line graph from 1959 to 2017. It is broken down by month in calenders to show the slow increase of emissions over time. I increased the figure width and height so that I could clearly see the months at the bottom of the graph. The labs function labels the X and Y axis. 

It was interesting how steady the rise of emissions has been. Clearly time is positively correlated with emissions, and if this trend continues the environment will continue to suffer.

######################
## Assignment #2 #####
######################

# Let's end with something cool

# Make sure you install the usmap package before running the script #
# This assumes you have imported health.csv dataset to an object called health #

# load require packages
library(tidyverse)
library(usmap)

plot_usmap(data = health, values = "life_expectancy", color = "red") + 
    scale_fill_continuous(
        low = "white", high = "red", name = "Life Expectancy", label = scales::comma
    ) + theme(legend.position = "right")