Setup

R Packages

library(tidyverse) #loading all library needed for this assignment
library(readxl)
library(knitr)  
library(plyr)
library(knitr)

Importing Data

setwd("~/R/DATA607_Assg1_Trump Impeachment-Polls") #set working directory which has imported data frame
setwd("~/R/DATA607_Assg1_Trump Impeachment-Polls/Impeachement_dataset")
dir() # what is in the directory
## [1] "Assg1.Rmd"                  "Impeachement_dataset.Rproj"
## [3] "impeachment-polls.csv"      "impeachment-polls.xlsx"    
## [5] "impeachment_topline.csv"    "impeachment_topline.xlsx"  
## [7] "README.html"                "README.md"
getwd()  # what is the current working directory
## [1] "C:/Users/Petit Mandela/Documents/R/DATA607_Assg1_Trump Impeachment-Polls/Impeachement_dataset"
#git clone https://github.com/asmozo24/Impeachement_dataset.git

##Loading Data

##impeachment_topline # loading the data, this is the data that generate the plot
##str(impeachment_topline) # what is the structure of the data, just to have a better look

##Making Change in Data

##Impeachement_Data <- impeachment_topline # changing the name
##Impeachement_Data # checking if the name was effective

##Handling missings data

###is.na(Impeachement_Data) # checking if there is a missing data in the dataset
#sum(is.na(Impeachement_Data)) # file to big, checking the sum of all missing data (return is 16 missing data)
#na.omit(Impeachement_Data) # delete/remove the missings data because it is an imcomplete observation
##Impeachment_Data %>% rename(President_Name = president) # not working 
#rename(Impeachement_Data, c("president" = "President_Name", "subgroup" = "Voters", "party" = "Voters_Party", "category_group" = "Voters_Opinion", "modeldate" = "Model_Date", "yes_estimate" = "Yes_Percent", "no_estimate" = "No_Percent")) # Renaming variables

##Making Plots

#
#ggplot(data = Impeachement_Data, aes(x = timestamp, y = 'Yes_Percent')) + 
 # geom_point() # no working ..data to big
#plot(timestamp, 'Yes_%', main = "Polynomial Regression", las = 1)