R_Home_Work

#1. Reading the CSV file from github
mydata=fread('https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/Stat2Data/Election08.csv')
mydata=data.frame(mydata)

#2. Use the summary function to gain an overview of the data set. 
summary(mydata)

##       V1               State               Abr                Income     
##  Length:51          Length:51          Length:51          Min.   :28845  
##  Class :character   Class :character   Class :character   1st Qu.:33536  
##  Mode  :character   Mode  :character   Mode  :character   Median :36047  
##                                                           Mean   :37642  
##                                                           3rd Qu.:40544  
##                                                           Max.   :61092  
##        HS              BA           Dem.Rep          ObamaWin     
##  Min.   :78.50   Min.   :17.30   Min.   :-23.00   Min.   :0.0000  
##  1st Qu.:83.00   1st Qu.:24.20   1st Qu.:  3.00   1st Qu.:0.0000  
##  Median :87.00   Median :25.80   Median : 12.00   Median :1.0000  
##  Mean   :86.00   Mean   :27.15   Mean   : 12.31   Mean   :0.5686  
##  3rd Qu.:89.05   3rd Qu.:29.65   3rd Qu.: 19.00   3rd Qu.:1.0000  
##  Max.   :91.20   Max.   :47.50   Max.   : 75.00   Max.   :1.0000

#Then display the mean and median for at least two attributes.
meanandMedian(mydata$Income,mydata$HS)

## [1] 37641.57
## [1] 87

#3.Create a new data frame with a subset of the columns and rows. Make sure to rename it.
newData=subset(mydata, mydata$Income>40000 & mydata$ObamaWin==1)
# Summary and Printing New Data Frame.
summary(newData)

##       V1               State               Abr                Income     
##  Length:15          Length:15          Length:15          Min.   :40322  
##  Class :character   Class :character   Class :character   1st Qu.:40821  
##  Mode  :character   Mode  :character   Mode  :character   Median :41512  
##                                                           Mean   :45015  
##                                                           3rd Qu.:48234  
##                                                           Max.   :61092  
##        HS              BA           Dem.Rep         ObamaWin
##  Min.   :80.20   Min.   :21.80   Min.   : 9.00   Min.   :1  
##  1st Qu.:85.70   1st Qu.:29.90   1st Qu.:14.00   1st Qu.:1  
##  Median :87.40   Median :32.50   Median :19.00   Median :1  
##  Mean   :86.88   Mean   :32.68   Mean   :23.27   Mean   :1  
##  3rd Qu.:88.65   3rd Qu.:34.85   3rd Qu.:26.00   3rd Qu.:1  
##  Max.   :91.00   Max.   :47.50   Max.   :75.00   Max.   :1

print(newData)

##    V1                State Abr Income   HS   BA Dem.Rep ObamaWin
## 5   5          California   CA  41571 80.2 29.5      19        1
## 6   6            Colorado   CO  41042 88.9 35.0      11        1
## 7   7         Connecticut   CT  54117 88.0 34.7      26        1
## 8   8            Delaware   DE  40608 87.4 26.1      23        1
## 9   9 District of Columbia  DC  61092 85.7 47.5      75        1
## 14 14            Illinois   IL  40322 85.7 29.5      24        1
## 21 21            Maryland   MD  46021 87.4 35.2      26        1
## 22 22       Massachusetts   MA  49082 88.4 37.9      34        1
## 24 24           Minnesota   MN  41034 91.0 31.0      15        1
## 29 29              Nevada   NV  40480 83.7 21.8      11        1
## 30 30       New Hampshire   NH  41512 90.5 32.5      13        1
## 31 31          New Jersey   NJ  49194 87.0 33.9      19        1
## 33 33            New York   NY  47385 84.1 31.7      27        1
## 47 47            Virginia   VA  41347 85.9 33.6       9        1
## 48 48          Washington   WA  40414 89.3 30.3      17        1

#4.Create new column names for the new data frame
modata=plyr::rename(newData,c("Income"="fortyKIncome","ObamaWin"="BarackWin"))
print(modata)

##    V1                State Abr fortyKIncome   HS   BA Dem.Rep BarackWin
## 5   5          California   CA        41571 80.2 29.5      19         1
## 6   6            Colorado   CO        41042 88.9 35.0      11         1
## 7   7         Connecticut   CT        54117 88.0 34.7      26         1
## 8   8            Delaware   DE        40608 87.4 26.1      23         1
## 9   9 District of Columbia  DC        61092 85.7 47.5      75         1
## 14 14            Illinois   IL        40322 85.7 29.5      24         1
## 21 21            Maryland   MD        46021 87.4 35.2      26         1
## 22 22       Massachusetts   MA        49082 88.4 37.9      34         1
## 24 24           Minnesota   MN        41034 91.0 31.0      15         1
## 29 29              Nevada   NV        40480 83.7 21.8      11         1
## 30 30       New Hampshire   NH        41512 90.5 32.5      13         1
## 31 31          New Jersey   NJ        49194 87.0 33.9      19         1
## 33 33            New York   NY        47385 84.1 31.7      27         1
## 47 47            Virginia   VA        41347 85.9 33.6       9         1
## 48 48          Washington   WA        40414 89.3 30.3      17         1

#5.Use the summary function to create an overview of your new data frame. The print the mean
#and median for the same two attributes. Please compare.
summary(modata)

##       V1               State               Abr             fortyKIncome  
##  Length:15          Length:15          Length:15          Min.   :40322  
##  Class :character   Class :character   Class :character   1st Qu.:40821  
##  Mode  :character   Mode  :character   Mode  :character   Median :41512  
##                                                           Mean   :45015  
##                                                           3rd Qu.:48234  
##                                                           Max.   :61092  
##        HS              BA           Dem.Rep        BarackWin
##  Min.   :80.20   Min.   :21.80   Min.   : 9.00   Min.   :1  
##  1st Qu.:85.70   1st Qu.:29.90   1st Qu.:14.00   1st Qu.:1  
##  Median :87.40   Median :32.50   Median :19.00   Median :1  
##  Mean   :86.88   Mean   :32.68   Mean   :23.27   Mean   :1  
##  3rd Qu.:88.65   3rd Qu.:34.85   3rd Qu.:26.00   3rd Qu.:1  
##  Max.   :91.00   Max.   :47.50   Max.   :75.00   Max.   :1

meanandMedian(modata$fortyKIncome,modata$HS)

## [1] 45014.73
## [1] 87.4

#5 Replace the value of "e" in dataframe with Excellent. Used gsub for substring.
resultData<-as.data.frame(sapply(modata,gsub,pattern="e",replacement="Excellent",ignore.case = TRUE))
#Printing the resultant modified data.
print(resultData)

##    V1                               State        Abr fortyKIncome   HS
## 1   5                         California          CA        41571 80.2
## 2   6                           Colorado          CO        41042 88.9
## 3   7                ConnExcellentcticut          CT        54117   88
## 4   8           DExcellentlawarExcellent  DExcellent        40608 87.4
## 5   9                District of Columbia         DC        61092 85.7
## 6  14                           Illinois          IL        40322 85.7
## 7  21                           Maryland          MD        46021 87.4
## 8  22              MassachusExcellenttts          MA        49082 88.4
## 9  24                  MinnExcellentsota          MN        41034   91
## 10 29                     NExcellentvada          NV        40480 83.7
## 11 30      NExcellentw HampshirExcellent          NH        41512 90.5
## 12 31 NExcellentw JExcellentrsExcellenty          NJ        49194   87
## 13 33                   NExcellentw York          NY        47385 84.1
## 14 47                           Virginia          VA        41347 85.9
## 15 48                         Washington          WA        40414 89.3
##      BA Dem.Rep BarackWin
## 1  29.5      19         1
## 2    35      11         1
## 3  34.7      26         1
## 4  26.1      23         1
## 5  47.5      75         1
## 6  29.5      24         1
## 7  35.2      26         1
## 8  37.9      34         1
## 9    31      15         1
## 10 21.8      11         1
## 11 32.5      13         1
## 12 33.9      19         1
## 13 31.7      27         1
## 14 33.6       9         1
## 15 30.3      17         1

R_Home_Work_2

dilipganesan

January 13, 2017