1. Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.

general_social <- read.csv('GSS7402.csv')
summary(general_social)
##        X             kids            age          education          year     
##  Min.   :   1   Min.   :0.000   Min.   :18.00   Min.   : 0.00   Min.   :1974  
##  1st Qu.:2281   1st Qu.:1.000   1st Qu.:31.00   1st Qu.:12.00   1st Qu.:1982  
##  Median :4560   Median :2.000   Median :43.00   Median :12.00   Median :1994  
##  Mean   :4560   Mean   :2.076   Mean   :46.08   Mean   :12.64   Mean   :1990  
##  3rd Qu.:6840   3rd Qu.:3.000   3rd Qu.:59.00   3rd Qu.:14.00   3rd Qu.:1998  
##  Max.   :9120   Max.   :8.000   Max.   :89.00   Max.   :20.00   Max.   :2002  
##                                                                               
##     siblings      agefirstbirth    ethnicity            city16         
##  Min.   : 0.000   Min.   : 9.00   Length:9120        Length:9120       
##  1st Qu.: 2.000   1st Qu.:19.00   Class :character   Class :character  
##  Median : 3.000   Median :22.00   Mode  :character   Mode  :character  
##  Mean   : 4.051   Mean   :22.63                                        
##  3rd Qu.: 6.000   3rd Qu.:25.00                                        
##  Max.   :35.000   Max.   :42.00                                        
##                   NA's   :5808                                         
##  lowincome16         immigrant        
##  Length:9120        Length:9120       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
#mean/median of age
mean(general_social$age)
## [1] 46.08202
median(general_social$age)
## [1] 43
#mean/media of yrs of education
mean(general_social$education)
## [1] 12.63509
median(general_social$education)
## [1] 12

2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.

df_social <- data.frame(general_social)
subset_social <- subset(general_social, general_social$ethnicity == 'other' & siblings > 8)

head(subset_social)
##       X kids age education year siblings agefirstbirth ethnicity city16
## 18   18    1  52        14 2002       21            20     other     no
## 44   44    1  49        15 2002        9            19     other    yes
## 149 149    0  35        13 2002       15            NA     other    yes
## 152 152    5  48        14 2002       12            20     other     no
## 155 155    2  25        11 2002       15            17     other     no
## 198 198    3  52         5 2002       15            22     other    yes
##     lowincome16 immigrant
## 18          yes        no
## 44           no        no
## 149          no        no
## 152          no        no
## 155         yes        no
## 198          no       yes

#3. Create new column names for the new data frame.

#rename existing column
library(plyr)
names(subset_social)
##  [1] "X"             "kids"          "age"           "education"    
##  [5] "year"          "siblings"      "agefirstbirth" "ethnicity"    
##  [9] "city16"        "lowincome16"   "immigrant"
subset_social_rename <- rename(subset_social,c("kids"="children","age"="yearsofage","education"="school","year"="years","siblings"="#ofsiblings","agefirstbirth"="age1stbirth","ethnicity"="caucasian?","city16"="city","lowincome16"="lowincome","immigrant"="non-citizen"))
names(subset_social_rename)
##  [1] "X"           "children"    "yearsofage"  "school"      "years"      
##  [6] "#ofsiblings" "age1stbirth" "caucasian?"  "city"        "lowincome"  
## [11] "non-citizen"
#create new column with new name
subset_social_rename$resident = c("yes")
head(subset_social_rename) 
##       X children yearsofage school years #ofsiblings age1stbirth caucasian?
## 18   18        1         52     14  2002          21          20      other
## 44   44        1         49     15  2002           9          19      other
## 149 149        0         35     13  2002          15          NA      other
## 152 152        5         48     14  2002          12          20      other
## 155 155        2         25     11  2002          15          17      other
## 198 198        3         52      5  2002          15          22      other
##     city lowincome non-citizen resident
## 18    no       yes          no      yes
## 44   yes        no          no      yes
## 149  yes        no          no      yes
## 152   no        no          no      yes
## 155   no       yes          no      yes
## 198  yes        no         yes      yes

4. Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

summary(subset_social_rename)
##        X           children       yearsofage        school          years     
##  Min.   :  18   Min.   :0.000   Min.   :19.00   Min.   : 0.00   Min.   :1974  
##  1st Qu.:2712   1st Qu.:1.000   1st Qu.:32.00   1st Qu.: 9.00   1st Qu.:1982  
##  Median :4236   Median :2.000   Median :43.00   Median :12.00   Median :1990  
##  Mean   :4519   Mean   :2.894   Mean   :46.12   Mean   :11.17   Mean   :1989  
##  3rd Qu.:6206   3rd Qu.:4.000   3rd Qu.:59.00   3rd Qu.:13.00   3rd Qu.:1998  
##  Max.   :9038   Max.   :8.000   Max.   :89.00   Max.   :20.00   Max.   :2002  
##                                                                               
##   #ofsiblings     age1stbirth     caucasian?            city          
##  Min.   : 9.00   Min.   :12.00   Length:339         Length:339        
##  1st Qu.: 9.00   1st Qu.:18.00   Class :character   Class :character  
##  Median :11.00   Median :20.00   Mode  :character   Mode  :character  
##  Mean   :11.64   Mean   :21.02                                        
##  3rd Qu.:13.00   3rd Qu.:23.00                                        
##  Max.   :27.00   Max.   :35.00                                        
##                  NA's   :214                                          
##   lowincome         non-citizen          resident        
##  Length:339         Length:339         Length:339        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 
#The median of age and education between the subset and the original dataset remained the same. There is a slight difference in the mean for each attribute. 
mean(subset_social_rename$yearsofage)
## [1] 46.12094
median(subset_social_rename$yearsofage)
## [1] 43
mean(subset_social_rename$school)
## [1] 11.16519
median(subset_social_rename$school)
## [1] 12
mean(general_social$age)
## [1] 46.08202
median(general_social$age)
## [1] 43
mean(general_social$education)
## [1] 12.63509
median(general_social$education)
## [1] 12

5. For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.

unique(subset_social_rename$years)
## [1] 2002 1974 1978 1982 1986 1990 1994 1998
subset_social_rename[subset_social_rename == "2002"] <- "00' baby"
subset_social_rename[subset_social_rename == "1974"] <- "74' baby"
subset_social_rename[subset_social_rename == "1978"] <- "78' baby"

unique(subset_social_rename$years)
## [1] "00' baby" "74' baby" "78' baby" "1982"     "1986"     "1990"     "1994"    
## [8] "1998"

6. Display enough rows to see examples of all of steps 1-5 above.

head(subset_social_rename,100)
##         X children yearsofage school    years #ofsiblings age1stbirth
## 18     18        1         52     14 00' baby          21          20
## 44     44        1         49     15 00' baby           9          19
## 149   149        0         35     13 00' baby          15          NA
## 152   152        5         48     14 00' baby          12          20
## 155   155        2         25     11 00' baby          15          17
## 198   198        3         52      5 00' baby          15          22
## 228   228        2         40     14 00' baby          17          27
## 229   229        1         45     20 00' baby          10          22
## 247   247        1         26     12 00' baby           9          22
## 274   274        2         76      8 00' baby          15          23
## 288   288        4         42     11 00' baby          10          16
## 300   300        6         66     12 00' baby           9          16
## 315   315        0         76     12 00' baby           9          NA
## 332   332        4         30     11 00' baby          14          17
## 335   335        3         48     12 00' baby          10          22
## 357   357        5         88      8 00' baby           9          18
## 424   424        4         67     14 00' baby           9          22
## 460   460        3         36     14 00' baby           9          21
## 508   508        0         44      2 00' baby          12          NA
## 514   514        7         76      8 00' baby          11          17
## 531   531        6         67      6 00' baby          10          27
## 569   569        3         63     12 00' baby          10          18
## 595   595        1         71      9 00' baby          20          21
## 628   628        2         50     12 00' baby          11          21
## 673   673        3         61     13 00' baby           9          27
## 679   679        2         40     15 00' baby          12          20
## 680   680        6         62      9 00' baby          11          18
## 681   681        5         49      6 00' baby           9          17
## 704   704        7         83      5 00' baby          11          17
## 746   746        1         19     10 00' baby          10          18
## 790   790        4         28     20 00' baby          12          17
## 811   811        1         42     14 00' baby           9          22
## 814   814        5         33     13 00' baby          14          17
## 855   855        2         42     12 00' baby          19          21
## 974   974        0         41      8 00' baby          10          NA
## 975   975        3         54     15 00' baby          14          25
## 983   983        1         38     19 00' baby           9          26
## 996   996        4         51     14 00' baby          10          24
## 1024 1024        4         53      8 00' baby          12          16
## 1068 1068        8         70      7 00' baby          23          19
## 1107 1107        0         31     12 00' baby           9          NA
## 1157 1157        0         54     20 00' baby           9          NA
## 1200 1200        8         72      7 00' baby          16          19
## 1233 1233        7         73      6 00' baby          15          20
## 1238 1238        3         46     15 00' baby           9          18
## 1273 1273        0         27     15 00' baby          12          NA
## 1463 1463        7         43     14 00' baby           9          19
## 1524 1524        3         30     12 74' baby          12          NA
## 1536 1536        7         48     12 74' baby          10          NA
## 1618 1618        2         30     15 74' baby          10          NA
## 1635 1635        5         38     13 74' baby          13          NA
## 1677 1677        0         28     13 74' baby           9          NA
## 1690 1690        3         65      6 74' baby          10          NA
## 1700 1700        3         54     10 74' baby           9          NA
## 1711 1711        0         31     12 74' baby          10          NA
## 1712 1712        3         44     18 74' baby           9          NA
## 1739 1739        7         39     10 74' baby          15          NA
## 1765 1765        2         34     13 74' baby           9          NA
## 1766 1766        7         42     13 74' baby          11          NA
## 1798 1798        3         31     12 74' baby          10          NA
## 1811 1811        1         62      8 74' baby          10          NA
## 1821 1821        2         23     12 74' baby          18          NA
## 1844 1844        8         49     11 74' baby          12          NA
## 1883 1883        3         66     12 74' baby          16          NA
## 1901 1901        1         19     11 74' baby          12          NA
## 1993 1993        0         85      5 74' baby           9          NA
## 2075 2075        0         30     17 74' baby          10          NA
## 2110 2110        2         68      6 74' baby          16          NA
## 2218 2218        5         59      6 74' baby          15          NA
## 2222 2222        7         69     10 74' baby           9          NA
## 2247 2247        8         38      9 74' baby          12          NA
## 2250 2250        8         48     13 74' baby           9          NA
## 2334 2334        1         63      4 78' baby          12          NA
## 2335 2335        2         23     12 78' baby          10          NA
## 2360 2360        1         19      9 78' baby          14          NA
## 2464 2464        1         30     12 78' baby          11          NA
## 2507 2507        2         65      7 78' baby          14          NA
## 2515 2515        3         58      7 78' baby          10          NA
## 2525 2525        3         23     11 78' baby          10          NA
## 2526 2526        2         32     14 78' baby          12          NA
## 2551 2551        1         35     14 78' baby          13          NA
## 2568 2568        5         56      9 78' baby           9          NA
## 2689 2689        0         79      5 78' baby          10          NA
## 2690 2690        0         27     12 78' baby          10          NA
## 2709 2709        0         86      0 78' baby          13          NA
## 2714 2714        1         23     10 78' baby          11          NA
## 2758 2758        1         29     12 78' baby           9          NA
## 2759 2759        0         33     14 78' baby          14          NA
## 2883 2883        2         68      5 78' baby           9          NA
## 2890 2890        1         76      8 78' baby          14          NA
## 2951 2951        1         23     14 78' baby          22          NA
## 2973 2973        3         31     11 78' baby          14          NA
## 2975 2975        0         20     11 78' baby          12          NA
## 3088 3088        1         61      5 78' baby          16          NA
## 3098 3098        3         44     12 78' baby          10          NA
## 3235 3235        4         37     12     1982          11          NA
## 3277 3277        3         40      7     1982          13          NA
## 3289 3289        1         24     13     1982          11          NA
## 3331 3331        0         27     12     1982           9          NA
## 3355 3355        1         48      7     1982          10          NA
##      caucasian? city lowincome non-citizen resident
## 18        other   no       yes          no      yes
## 44        other  yes        no          no      yes
## 149       other  yes        no          no      yes
## 152       other   no        no          no      yes
## 155       other   no       yes          no      yes
## 198       other  yes        no         yes      yes
## 228       other  yes        no         yes      yes
## 229       other   no        no          no      yes
## 247       other  yes        no         yes      yes
## 274       other   no        no          no      yes
## 288       other   no        no         yes      yes
## 300       other  yes        no          no      yes
## 315       other  yes        no          no      yes
## 332       other  yes        no          no      yes
## 335       other  yes        no         yes      yes
## 357       other   no        no          no      yes
## 424       other   no       yes          no      yes
## 460       other   no       yes          no      yes
## 508       other   no       yes         yes      yes
## 514       other   no        no          no      yes
## 531       other   no        no          no      yes
## 569       other   no        no          no      yes
## 595       other   no        no          no      yes
## 628       other  yes        no          no      yes
## 673       other   no        no          no      yes
## 679       other  yes        no         yes      yes
## 680       other  yes        no          no      yes
## 681       other   no       yes         yes      yes
## 704       other   no        no          no      yes
## 746       other  yes        no          no      yes
## 790       other   no       yes          no      yes
## 811       other  yes        no          no      yes
## 814       other   no        no          no      yes
## 855       other  yes        no          no      yes
## 974       other  yes        no          no      yes
## 975       other   no        no          no      yes
## 983       other   no       yes          no      yes
## 996       other  yes        no         yes      yes
## 1024      other   no        no          no      yes
## 1068      other   no       yes          no      yes
## 1107      other   no        no          no      yes
## 1157      other   no       yes         yes      yes
## 1200      other   no        no          no      yes
## 1233      other   no        no          no      yes
## 1238      other   no        no          no      yes
## 1273      other   no        no          no      yes
## 1463      other  yes       yes          no      yes
## 1524      other   no       yes          no      yes
## 1536      other   no       yes          no      yes
## 1618      other  yes       yes          no      yes
## 1635      other  yes       yes          no      yes
## 1677      other  yes       yes          no      yes
## 1690      other   no       yes          no      yes
## 1700      other  yes       yes          no      yes
## 1711      other   no       yes          no      yes
## 1712      other   no       yes          no      yes
## 1739      other   no       yes          no      yes
## 1765      other   no       yes          no      yes
## 1766      other   no       yes          no      yes
## 1798      other   no       yes          no      yes
## 1811      other  yes       yes          no      yes
## 1821      other   no       yes          no      yes
## 1844      other  yes        no          no      yes
## 1883      other   no        no          no      yes
## 1901      other  yes        no          no      yes
## 1993      other   no       yes          no      yes
## 2075      other   no       yes          no      yes
## 2110      other   no       yes          no      yes
## 2218      other   no       yes          no      yes
## 2222      other   no       yes          no      yes
## 2247      other   no       yes          no      yes
## 2250      other   no       yes          no      yes
## 2334      other   no        no          no      yes
## 2335      other  yes        no         yes      yes
## 2360      other  yes       yes          no      yes
## 2464      other   no       yes         yes      yes
## 2507      other   no       yes          no      yes
## 2515      other  yes        no          no      yes
## 2525      other   no       yes          no      yes
## 2526      other   no       yes          no      yes
## 2551      other   no       yes          no      yes
## 2568      other   no        no         yes      yes
## 2689      other   no       yes          no      yes
## 2690      other   no        no          no      yes
## 2709      other   no        no          no      yes
## 2714      other   no       yes          no      yes
## 2758      other  yes       yes          no      yes
## 2759      other   no        no          no      yes
## 2883      other   no       yes          no      yes
## 2890      other   no       yes          no      yes
## 2951      other  yes        no          no      yes
## 2973      other   no       yes          no      yes
## 2975      other   no        no          no      yes
## 3088      other   no       yes          no      yes
## 3098      other   no       yes          no      yes
## 3235      other   no        no          no      yes
## 3277      other   no       yes          no      yes
## 3289      other  yes       yes          no      yes
## 3331      other  yes        no          no      yes
## 3355      other   no       yes          no      yes

7. BONUS Place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.

social_github <- read.csv('https://raw.githubusercontent.com/nk014914/R-Bridge-Class-Week-2-HW-CSV/main/GSS7402.csv')