general_social <- read.csv('GSS7402.csv')
summary(general_social)
## X kids age education year
## Min. : 1 Min. :0.000 Min. :18.00 Min. : 0.00 Min. :1974
## 1st Qu.:2281 1st Qu.:1.000 1st Qu.:31.00 1st Qu.:12.00 1st Qu.:1982
## Median :4560 Median :2.000 Median :43.00 Median :12.00 Median :1994
## Mean :4560 Mean :2.076 Mean :46.08 Mean :12.64 Mean :1990
## 3rd Qu.:6840 3rd Qu.:3.000 3rd Qu.:59.00 3rd Qu.:14.00 3rd Qu.:1998
## Max. :9120 Max. :8.000 Max. :89.00 Max. :20.00 Max. :2002
##
## siblings agefirstbirth ethnicity city16
## Min. : 0.000 Min. : 9.00 Length:9120 Length:9120
## 1st Qu.: 2.000 1st Qu.:19.00 Class :character Class :character
## Median : 3.000 Median :22.00 Mode :character Mode :character
## Mean : 4.051 Mean :22.63
## 3rd Qu.: 6.000 3rd Qu.:25.00
## Max. :35.000 Max. :42.00
## NA's :5808
## lowincome16 immigrant
## Length:9120 Length:9120
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
#mean/median of age
mean(general_social$age)
## [1] 46.08202
median(general_social$age)
## [1] 43
#mean/media of yrs of education
mean(general_social$education)
## [1] 12.63509
median(general_social$education)
## [1] 12
df_social <- data.frame(general_social)
subset_social <- subset(general_social, general_social$ethnicity == 'other' & siblings > 8)
head(subset_social)
## X kids age education year siblings agefirstbirth ethnicity city16
## 18 18 1 52 14 2002 21 20 other no
## 44 44 1 49 15 2002 9 19 other yes
## 149 149 0 35 13 2002 15 NA other yes
## 152 152 5 48 14 2002 12 20 other no
## 155 155 2 25 11 2002 15 17 other no
## 198 198 3 52 5 2002 15 22 other yes
## lowincome16 immigrant
## 18 yes no
## 44 no no
## 149 no no
## 152 no no
## 155 yes no
## 198 no yes
#rename existing column
library(plyr)
names(subset_social)
## [1] "X" "kids" "age" "education"
## [5] "year" "siblings" "agefirstbirth" "ethnicity"
## [9] "city16" "lowincome16" "immigrant"
subset_social_rename <- rename(subset_social,c("kids"="children","age"="yearsofage","education"="school","year"="years","siblings"="#ofsiblings","agefirstbirth"="age1stbirth","ethnicity"="caucasian?","city16"="city","lowincome16"="lowincome","immigrant"="non-citizen"))
names(subset_social_rename)
## [1] "X" "children" "yearsofage" "school" "years"
## [6] "#ofsiblings" "age1stbirth" "caucasian?" "city" "lowincome"
## [11] "non-citizen"
#create new column with new name
subset_social_rename$resident = c("yes")
head(subset_social_rename)
## X children yearsofage school years #ofsiblings age1stbirth caucasian?
## 18 18 1 52 14 2002 21 20 other
## 44 44 1 49 15 2002 9 19 other
## 149 149 0 35 13 2002 15 NA other
## 152 152 5 48 14 2002 12 20 other
## 155 155 2 25 11 2002 15 17 other
## 198 198 3 52 5 2002 15 22 other
## city lowincome non-citizen resident
## 18 no yes no yes
## 44 yes no no yes
## 149 yes no no yes
## 152 no no no yes
## 155 no yes no yes
## 198 yes no yes yes
summary(subset_social_rename)
## X children yearsofage school years
## Min. : 18 Min. :0.000 Min. :19.00 Min. : 0.00 Min. :1974
## 1st Qu.:2712 1st Qu.:1.000 1st Qu.:32.00 1st Qu.: 9.00 1st Qu.:1982
## Median :4236 Median :2.000 Median :43.00 Median :12.00 Median :1990
## Mean :4519 Mean :2.894 Mean :46.12 Mean :11.17 Mean :1989
## 3rd Qu.:6206 3rd Qu.:4.000 3rd Qu.:59.00 3rd Qu.:13.00 3rd Qu.:1998
## Max. :9038 Max. :8.000 Max. :89.00 Max. :20.00 Max. :2002
##
## #ofsiblings age1stbirth caucasian? city
## Min. : 9.00 Min. :12.00 Length:339 Length:339
## 1st Qu.: 9.00 1st Qu.:18.00 Class :character Class :character
## Median :11.00 Median :20.00 Mode :character Mode :character
## Mean :11.64 Mean :21.02
## 3rd Qu.:13.00 3rd Qu.:23.00
## Max. :27.00 Max. :35.00
## NA's :214
## lowincome non-citizen resident
## Length:339 Length:339 Length:339
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
#The median of age and education between the subset and the original dataset remained the same. There is a slight difference in the mean for each attribute.
mean(subset_social_rename$yearsofage)
## [1] 46.12094
median(subset_social_rename$yearsofage)
## [1] 43
mean(subset_social_rename$school)
## [1] 11.16519
median(subset_social_rename$school)
## [1] 12
mean(general_social$age)
## [1] 46.08202
median(general_social$age)
## [1] 43
mean(general_social$education)
## [1] 12.63509
median(general_social$education)
## [1] 12
unique(subset_social_rename$years)
## [1] 2002 1974 1978 1982 1986 1990 1994 1998
subset_social_rename[subset_social_rename == "2002"] <- "00' baby"
subset_social_rename[subset_social_rename == "1974"] <- "74' baby"
subset_social_rename[subset_social_rename == "1978"] <- "78' baby"
unique(subset_social_rename$years)
## [1] "00' baby" "74' baby" "78' baby" "1982" "1986" "1990" "1994"
## [8] "1998"
head(subset_social_rename,100)
## X children yearsofage school years #ofsiblings age1stbirth
## 18 18 1 52 14 00' baby 21 20
## 44 44 1 49 15 00' baby 9 19
## 149 149 0 35 13 00' baby 15 NA
## 152 152 5 48 14 00' baby 12 20
## 155 155 2 25 11 00' baby 15 17
## 198 198 3 52 5 00' baby 15 22
## 228 228 2 40 14 00' baby 17 27
## 229 229 1 45 20 00' baby 10 22
## 247 247 1 26 12 00' baby 9 22
## 274 274 2 76 8 00' baby 15 23
## 288 288 4 42 11 00' baby 10 16
## 300 300 6 66 12 00' baby 9 16
## 315 315 0 76 12 00' baby 9 NA
## 332 332 4 30 11 00' baby 14 17
## 335 335 3 48 12 00' baby 10 22
## 357 357 5 88 8 00' baby 9 18
## 424 424 4 67 14 00' baby 9 22
## 460 460 3 36 14 00' baby 9 21
## 508 508 0 44 2 00' baby 12 NA
## 514 514 7 76 8 00' baby 11 17
## 531 531 6 67 6 00' baby 10 27
## 569 569 3 63 12 00' baby 10 18
## 595 595 1 71 9 00' baby 20 21
## 628 628 2 50 12 00' baby 11 21
## 673 673 3 61 13 00' baby 9 27
## 679 679 2 40 15 00' baby 12 20
## 680 680 6 62 9 00' baby 11 18
## 681 681 5 49 6 00' baby 9 17
## 704 704 7 83 5 00' baby 11 17
## 746 746 1 19 10 00' baby 10 18
## 790 790 4 28 20 00' baby 12 17
## 811 811 1 42 14 00' baby 9 22
## 814 814 5 33 13 00' baby 14 17
## 855 855 2 42 12 00' baby 19 21
## 974 974 0 41 8 00' baby 10 NA
## 975 975 3 54 15 00' baby 14 25
## 983 983 1 38 19 00' baby 9 26
## 996 996 4 51 14 00' baby 10 24
## 1024 1024 4 53 8 00' baby 12 16
## 1068 1068 8 70 7 00' baby 23 19
## 1107 1107 0 31 12 00' baby 9 NA
## 1157 1157 0 54 20 00' baby 9 NA
## 1200 1200 8 72 7 00' baby 16 19
## 1233 1233 7 73 6 00' baby 15 20
## 1238 1238 3 46 15 00' baby 9 18
## 1273 1273 0 27 15 00' baby 12 NA
## 1463 1463 7 43 14 00' baby 9 19
## 1524 1524 3 30 12 74' baby 12 NA
## 1536 1536 7 48 12 74' baby 10 NA
## 1618 1618 2 30 15 74' baby 10 NA
## 1635 1635 5 38 13 74' baby 13 NA
## 1677 1677 0 28 13 74' baby 9 NA
## 1690 1690 3 65 6 74' baby 10 NA
## 1700 1700 3 54 10 74' baby 9 NA
## 1711 1711 0 31 12 74' baby 10 NA
## 1712 1712 3 44 18 74' baby 9 NA
## 1739 1739 7 39 10 74' baby 15 NA
## 1765 1765 2 34 13 74' baby 9 NA
## 1766 1766 7 42 13 74' baby 11 NA
## 1798 1798 3 31 12 74' baby 10 NA
## 1811 1811 1 62 8 74' baby 10 NA
## 1821 1821 2 23 12 74' baby 18 NA
## 1844 1844 8 49 11 74' baby 12 NA
## 1883 1883 3 66 12 74' baby 16 NA
## 1901 1901 1 19 11 74' baby 12 NA
## 1993 1993 0 85 5 74' baby 9 NA
## 2075 2075 0 30 17 74' baby 10 NA
## 2110 2110 2 68 6 74' baby 16 NA
## 2218 2218 5 59 6 74' baby 15 NA
## 2222 2222 7 69 10 74' baby 9 NA
## 2247 2247 8 38 9 74' baby 12 NA
## 2250 2250 8 48 13 74' baby 9 NA
## 2334 2334 1 63 4 78' baby 12 NA
## 2335 2335 2 23 12 78' baby 10 NA
## 2360 2360 1 19 9 78' baby 14 NA
## 2464 2464 1 30 12 78' baby 11 NA
## 2507 2507 2 65 7 78' baby 14 NA
## 2515 2515 3 58 7 78' baby 10 NA
## 2525 2525 3 23 11 78' baby 10 NA
## 2526 2526 2 32 14 78' baby 12 NA
## 2551 2551 1 35 14 78' baby 13 NA
## 2568 2568 5 56 9 78' baby 9 NA
## 2689 2689 0 79 5 78' baby 10 NA
## 2690 2690 0 27 12 78' baby 10 NA
## 2709 2709 0 86 0 78' baby 13 NA
## 2714 2714 1 23 10 78' baby 11 NA
## 2758 2758 1 29 12 78' baby 9 NA
## 2759 2759 0 33 14 78' baby 14 NA
## 2883 2883 2 68 5 78' baby 9 NA
## 2890 2890 1 76 8 78' baby 14 NA
## 2951 2951 1 23 14 78' baby 22 NA
## 2973 2973 3 31 11 78' baby 14 NA
## 2975 2975 0 20 11 78' baby 12 NA
## 3088 3088 1 61 5 78' baby 16 NA
## 3098 3098 3 44 12 78' baby 10 NA
## 3235 3235 4 37 12 1982 11 NA
## 3277 3277 3 40 7 1982 13 NA
## 3289 3289 1 24 13 1982 11 NA
## 3331 3331 0 27 12 1982 9 NA
## 3355 3355 1 48 7 1982 10 NA
## caucasian? city lowincome non-citizen resident
## 18 other no yes no yes
## 44 other yes no no yes
## 149 other yes no no yes
## 152 other no no no yes
## 155 other no yes no yes
## 198 other yes no yes yes
## 228 other yes no yes yes
## 229 other no no no yes
## 247 other yes no yes yes
## 274 other no no no yes
## 288 other no no yes yes
## 300 other yes no no yes
## 315 other yes no no yes
## 332 other yes no no yes
## 335 other yes no yes yes
## 357 other no no no yes
## 424 other no yes no yes
## 460 other no yes no yes
## 508 other no yes yes yes
## 514 other no no no yes
## 531 other no no no yes
## 569 other no no no yes
## 595 other no no no yes
## 628 other yes no no yes
## 673 other no no no yes
## 679 other yes no yes yes
## 680 other yes no no yes
## 681 other no yes yes yes
## 704 other no no no yes
## 746 other yes no no yes
## 790 other no yes no yes
## 811 other yes no no yes
## 814 other no no no yes
## 855 other yes no no yes
## 974 other yes no no yes
## 975 other no no no yes
## 983 other no yes no yes
## 996 other yes no yes yes
## 1024 other no no no yes
## 1068 other no yes no yes
## 1107 other no no no yes
## 1157 other no yes yes yes
## 1200 other no no no yes
## 1233 other no no no yes
## 1238 other no no no yes
## 1273 other no no no yes
## 1463 other yes yes no yes
## 1524 other no yes no yes
## 1536 other no yes no yes
## 1618 other yes yes no yes
## 1635 other yes yes no yes
## 1677 other yes yes no yes
## 1690 other no yes no yes
## 1700 other yes yes no yes
## 1711 other no yes no yes
## 1712 other no yes no yes
## 1739 other no yes no yes
## 1765 other no yes no yes
## 1766 other no yes no yes
## 1798 other no yes no yes
## 1811 other yes yes no yes
## 1821 other no yes no yes
## 1844 other yes no no yes
## 1883 other no no no yes
## 1901 other yes no no yes
## 1993 other no yes no yes
## 2075 other no yes no yes
## 2110 other no yes no yes
## 2218 other no yes no yes
## 2222 other no yes no yes
## 2247 other no yes no yes
## 2250 other no yes no yes
## 2334 other no no no yes
## 2335 other yes no yes yes
## 2360 other yes yes no yes
## 2464 other no yes yes yes
## 2507 other no yes no yes
## 2515 other yes no no yes
## 2525 other no yes no yes
## 2526 other no yes no yes
## 2551 other no yes no yes
## 2568 other no no yes yes
## 2689 other no yes no yes
## 2690 other no no no yes
## 2709 other no no no yes
## 2714 other no yes no yes
## 2758 other yes yes no yes
## 2759 other no no no yes
## 2883 other no yes no yes
## 2890 other no yes no yes
## 2951 other yes no no yes
## 2973 other no yes no yes
## 2975 other no no no yes
## 3088 other no yes no yes
## 3098 other no yes no yes
## 3235 other no no no yes
## 3277 other no yes no yes
## 3289 other yes yes no yes
## 3331 other yes no no yes
## 3355 other no yes no yes
social_github <- read.csv('https://raw.githubusercontent.com/nk014914/R-Bridge-Class-Week-2-HW-CSV/main/GSS7402.csv')