# Importing the data from Github
congress <- read.csv(file = 'https://raw.githubusercontent.com/tagensingh/SPS-DATA607-ASSIGNMENT-1/main/congress-terms.csv')
#summary(congress)
head(congress)
## congress chamber bioguide firstname middlename lastname suffix birthday
## 1 80 house M000112 Joseph Jefferson Mansfield 1861-02-09
## 2 80 house D000448 Robert Lee Doughton 1863-11-07
## 3 80 house S000001 Adolph Joachim Sabath 1866-04-04
## 4 80 house E000023 Charles Aubrey Eaton 1868-03-29
## 5 80 house L000296 William Lewis 1868-09-22
## 6 80 house G000017 James A. Gallagher 1869-01-16
## state party incumbent termstart age
## 1 TX D Yes 1947-01-03 85.9
## 2 NC D Yes 1947-01-03 83.2
## 3 IL D Yes 1947-01-03 80.7
## 4 NJ R Yes 1947-01-03 78.8
## 5 KY R No 1947-01-03 78.3
## 6 PA R No 1947-01-03 78.0
library(datasets)
library(ggplot2)
library(dplyr)
library("tidyverse")
# Confirming the data frame
class(congress)
## [1] "data.frame"
# Creating a subset of the original data with selected columns
congress2 <- congress [c(1,2,4,6,9,10,13)]
# Cleaning up the headers
names(congress2)[1] <- "Congress_No"
names(congress2)[2] <- "Chamber"
names(congress2)[3] <- "First_Name"
names(congress2)[4] <- "Last_Name"
names(congress2)[5] <- "State"
names(congress2)[6] <- "Party"
names(congress2)[7] <- "Age"
head(congress2)
## Congress_No Chamber First_Name Last_Name State Party Age
## 1 80 house Joseph Mansfield TX D 85.9
## 2 80 house Robert Doughton NC D 83.2
## 3 80 house Adolph Sabath IL D 80.7
## 4 80 house Charles Eaton NJ R 78.8
## 5 80 house William Lewis KY R 78.3
## 6 80 house James Gallagher PA R 78.0
#summary(congress2)
congress_age <- congress2[c("Congress_No","Age")]%>% group_by(Congress_No)
head(congress_age)
## # A tibble: 6 x 2
## # Groups: Congress_No [1]
## Congress_No Age
## <int> <dbl>
## 1 80 85.9
## 2 80 83.2
## 3 80 80.7
## 4 80 78.8
## 5 80 78.3
## 6 80 78
#class(congress_age)
congress_meanage <-aggregate(x = congress_age$Age, # Specify data column
by = list(congress_age$Congress_No), # Specify group indicator
FUN = mean)
colnames(congress_meanage)
## [1] "Group.1" "x"
names(congress_meanage)[1] <- "Congress_No"
names(congress_meanage)[2] <- "Average_Age"
head(congress_meanage)
## Congress_No Average_Age
## 1 80 52.49586
## 2 81 52.64147
## 3 82 53.23279
## 4 83 53.16571
## 5 84 53.41415
## 6 85 54.16892
ggplot(congress_meanage, aes(x=Congress_No, y= Average_Age )) +
geom_line()
