#Method 1: Select the file manually
stats <- read.csv(file.choose())
stats
#Method 2: Set working directory and read data
#Windows
#getwd("C:\\Users\\erickinsley\\Documents\\School\\R Programming A-Z\\Section 5 Data frames\\Demographic_Analysis")
#Mac
#setwd("/Users/erickinsley/Documents/School/R Programming A-Z/Section 5 Data frames/Demographic_Analysis")
#getwd()
#rm(stats)
#stats <- read.csv("DemographicData.csv")
#----------------Eploring data
stats
nrow(stats)
[1] 195
ncol(stats)
[1] 5
head(stats)
tail(stats)
head(stats, n=10)
tail(stats , n=6)
str(stats)
'data.frame': 195 obs. of 5 variables:
$ Country.Name : chr "Aruba" "Afghanistan" "Angola" "Albania" ...
$ Country.Code : chr "ABW" "AFG" "AGO" "ALB" ...
$ Birth.rate : num 10.2 35.3 46 12.9 11 ...
$ Internet.users: num 78.9 5.9 19.1 57.2 88 ...
$ Income.Group : chr "High income" "Low income" "Upper middle income" "Upper middle income" ...
#str(structure)
#runif(r unif)
summary(stats)
Country.Name Country.Code Birth.rate Internet.users Income.Group
Length:195 Length:195 Min. : 7.90 Min. : 0.90 Length:195
Class :character Class :character 1st Qu.:12.12 1st Qu.:14.52 Class :character
Mode :character Mode :character Median :19.68 Median :41.00 Mode :character
Mean :21.47 Mean :42.08
3rd Qu.:29.76 3rd Qu.:66.22
Max. :49.66 Max. :96.55
#----------------Using the $ sign
stats
head(stats)
stats[3,3]
[1] 45.985
stats[3,"Birth.rate"]
[1] 45.985
stats$Internet.users
[1] 78.90000 5.90000 19.10000 57.20000 88.00000 59.90000 41.90000 63.40000 83.00000 80.61880 58.70000 1.30000
[13] 82.17020 4.90000 9.10000 6.63000 53.06150 90.00004 72.00000 57.79000 54.17000 33.60000 95.30000 36.94000
[25] 51.04000 73.00000 64.50000 29.90000 15.00000 3.50000 85.80000 86.34000 66.50000 45.80000 8.40000 6.40000
[37] 6.60000 51.70000 6.50000 37.50000 45.96000 27.93000 74.10000 65.45480 74.11040 84.17000 9.50000 94.62970
[49] 45.90000 16.50000 40.35368 29.40000 0.90000 71.63500 79.40000 1.90000 91.51440 37.10000 81.91980 27.80000
[61] 9.20000 89.84410 43.30000 12.30000 1.60000 14.00000 3.10000 16.40000 59.86630 35.00000 65.80000 19.70000
[73] 65.40000 35.00000 74.20000 17.80000 66.74760 10.60000 72.64390 14.94000 15.10000 78.24770 29.95000 9.20000
[85] 96.54680 70.80000 58.45930 37.10000 41.00000 89.71000 54.00000 39.00000 23.00000 6.80000 11.50000 84.77000
[97] 75.46000 12.50000 70.50000 3.20000 16.50000 46.20000 93.80000 21.90000 5.00000 68.45290 93.77650 75.23440
[109] 65.80000 56.00000 45.00000 3.00000 44.10000 43.46000 65.24000 3.50000 68.91380 1.60000 60.31000 20.00000
[121] 5.40000 6.20000 39.00000 5.05000 66.97000 13.90000 66.00000 1.70000 38.00000 15.50000 93.95640 95.05340
[133] 13.30000 82.78000 66.45000 10.90000 44.03000 39.20000 37.00000 6.50000 62.84920 73.90000 62.09560 36.90000
[145] 56.80000 85.30000 49.76450 67.97000 9.00000 60.50000 22.70000 13.10000 81.00000 8.00000 1.70000 23.10930
[157] 1.50000 51.50000 14.10000 23.00000 37.40000 77.88260 72.67560 94.78360 24.70000 50.40000 26.20000 2.30000
[169] 4.50000 28.94000 16.00000 9.60000 1.10000 35.00000 63.80000 43.80000 46.25000 4.40000 16.20000 41.00000
[181] 57.69000 84.20000 38.20000 52.00000 54.90000 45.30000 43.90000 11.30000 46.60000 15.30000 20.00000 46.50000
[193] 2.20000 15.40000 18.50000
stats$Internet.users[2]
[1] 5.9
levels(stats$Income.Group)
NULL
#---------------- Basic operations with a data frame
stats[1:10,] #subsetting
stats[3:9,]
stats[c(4,100),]
#Remeber of [] work
stats[1,]
is.data.frame(stats[1,])
[1] TRUE
stats[,1]
[1] "Aruba" "Afghanistan" "Angola"
[4] "Albania" "United Arab Emirates" "Argentina"
[7] "Armenia" "Antigua and Barbuda" "Australia"
[10] "Austria" "Azerbaijan" "Burundi"
[13] "Belgium" "Benin" "Burkina Faso"
[16] "Bangladesh" "Bulgaria" "Bahrain"
[19] "Bahamas, The" "Bosnia and Herzegovina" "Belarus"
[22] "Belize" "Bermuda" "Bolivia"
[25] "Brazil" "Barbados" "Brunei Darussalam"
[28] "Bhutan" "Botswana" "Central African Republic"
[31] "Canada" "Switzerland" "Chile"
[34] "China" "Cote d'Ivoire" "Cameroon"
[37] "Congo, Rep." "Colombia" "Comoros"
[40] "Cabo Verde" "Costa Rica" "Cuba"
[43] "Cayman Islands" "Cyprus" "Czech Republic"
[46] "Germany" "Djibouti" "Denmark"
[49] "Dominican Republic" "Algeria" "Ecuador"
[52] "Egypt, Arab Rep." "Eritrea" "Spain"
[55] "Estonia" "Ethiopia" "Finland"
[58] "Fiji" "France" "Micronesia, Fed. Sts."
[61] "Gabon" "United Kingdom" "Georgia"
[64] "Ghana" "Guinea" "Gambia, The"
[67] "Guinea-Bissau" "Equatorial Guinea" "Greece"
[70] "Grenada" "Greenland" "Guatemala"
[73] "Guam" "Guyana" "Hong Kong SAR, China"
[76] "Honduras" "Croatia" "Haiti"
[79] "Hungary" "Indonesia" "India"
[82] "Ireland" "Iran, Islamic Rep." "Iraq"
[85] "Iceland" "Israel" "Italy"
[88] "Jamaica" "Jordan" "Japan"
[91] "Kazakhstan" "Kenya" "Kyrgyz Republic"
[94] "Cambodia" "Kiribati" "Korea, Rep."
[97] "Kuwait" "Lao PDR" "Lebanon"
[100] "Liberia" "Libya" "St. Lucia"
[103] "Liechtenstein" "Sri Lanka" "Lesotho"
[106] "Lithuania" "Luxembourg" "Latvia"
[109] "Macao SAR, China" "Morocco" "Moldova"
[112] "Madagascar" "Maldives" "Mexico"
[115] "Macedonia, FYR" "Mali" "Malta"
[118] "Myanmar" "Montenegro" "Mongolia"
[121] "Mozambique" "Mauritania" "Mauritius"
[124] "Malawi" "Malaysia" "Namibia"
[127] "New Caledonia" "Niger" "Nigeria"
[130] "Nicaragua" "Netherlands" "Norway"
[133] "Nepal" "New Zealand" "Oman"
[136] "Pakistan" "Panama" "Peru"
[139] "Philippines" "Papua New Guinea" "Poland"
[142] "Puerto Rico" "Portugal" "Paraguay"
[145] "French Polynesia" "Qatar" "Romania"
[148] "Russian Federation" "Rwanda" "Saudi Arabia"
[151] "Sudan" "Senegal" "Singapore"
[154] "Solomon Islands" "Sierra Leone" "El Salvador"
[157] "Somalia" "Serbia" "South Sudan"
[160] "Sao Tome and Principe" "Suriname" "Slovak Republic"
[163] "Slovenia" "Sweden" "Swaziland"
[166] "Seychelles" "Syrian Arab Republic" "Chad"
[169] "Togo" "Thailand" "Tajikistan"
[172] "Turkmenistan" "Timor-Leste" "Tonga"
[175] "Trinidad and Tobago" "Tunisia" "Turkey"
[178] "Tanzania" "Uganda" "Ukraine"
[181] "Uruguay" "United States" "Uzbekistan"
[184] "St. Vincent and the Grenadines" "Venezuela, RB" "Virgin Islands (U.S.)"
[187] "Vietnam" "Vanuatu" "West Bank and Gaza"
[190] "Samoa" "Yemen, Rep." "South Africa"
[193] "Congo, Dem. Rep." "Zambia" "Zimbabwe"
is.data.frame(stats[,1])
[1] FALSE
stats[,1,drop=F]
is.data.frame(stats[,1,drop=F])
[1] TRUE
#multiply columns
head(stats)
stats$Birth.rate * stats$Internet.users
[1] 808.2516 207.9927 878.3135 736.5644 971.8720 1061.1884 557.6052 1042.7398 1095.6000 757.8167
[11] 1074.2100 57.3963 920.3062 178.5560 369.0141 133.5415 488.1658 1353.6006 1104.4080 523.6930
[21] 677.1250 775.8912 991.1200 895.2778 762.0782 889.7240 1058.1225 542.2066 379.0050 119.2660
[31] 935.2200 880.6680 890.1025 554.1800 313.4880 238.3104 244.2726 831.1292 223.1190 810.9375
[41] 690.4111 290.4720 926.2500 748.5411 755.9261 715.4450 242.1170 946.2970 972.9882 408.1770
[51] 850.2521 824.1408 31.3200 651.8785 817.8200 62.5575 979.2041 759.1773 1007.6135 653.6058
[61] 281.1060 1096.0980 577.2756 407.5113 59.7392 595.3500 116.2593 579.9368 508.8636 676.6900
[71] 954.1000 541.0605 1137.2406 660.9750 586.1800 384.3554 627.4274 268.6570 668.3239 303.2372
[81] 306.3941 1173.7155 536.1050 286.0556 1293.7271 1508.0400 496.9040 502.3340 1108.8860 735.6220
[91] 1227.4200 1372.5660 625.6000 166.3416 334.0060 729.0220 1552.5895 338.1375 946.5330 113.6672
[101] 353.5125 712.8660 862.9600 391.1997 143.6900 691.3743 1059.6744 767.3909 740.6448 1177.2880
[111] 546.3450 104.0580 945.8127 830.2598 732.1233 154.4830 654.6811 28.9904 700.5610 485.5000
[121] 214.4070 209.5662 425.1000 199.2680 1125.4308 416.1243 1122.0000 84.4237 1521.7100 322.2140
[131] 958.3553 1102.6194 278.2759 1086.0736 1356.8426 322.4438 866.5104 791.7616 880.2300 187.8435
[141] 603.3523 798.1200 490.5552 796.5972 931.1224 1018.4820 437.9276 897.2040 294.2010 1244.8480
[151] 759.9279 504.7823 753.3000 244.6240 62.4393 403.8581 65.8365 473.8000 523.4766 794.3510
[161] 690.2170 786.6143 741.2911 1118.4465 743.2971 937.4400 629.9266 105.2135 162.3600 319.5265
[171] 492.6720 204.6912 39.3305 889.3150 930.8420 867.2400 778.6650 173.8792 704.2788 455.1000
[181] 829.2361 1052.5000 859.5000 847.9120 1089.3258 484.7100 682.0743 302.1507 1416.3604 400.4316
[191] 658.9400 969.5250 93.2668 623.2534 660.7275
stats$Birth.rate + stats$Internet.users
[1] 89.14400 41.15300 65.08500 70.07700 99.04400 77.61600 55.20800 79.84700 96.20000 90.01880
[11] 77.00000 45.45100 93.37020 41.34000 49.65100 26.77200 62.26150 105.04004 87.33900 66.85200
[21] 66.67000 56.69200 105.70000 61.17600 65.97100 85.18800 80.90500 48.03400 40.26700 37.57600
[31] 96.70000 96.54000 79.88500 57.90000 45.72000 43.63600 43.61100 67.77600 40.82600 59.12500
[41] 60.98200 38.33000 86.60000 76.89080 84.31040 92.67000 34.98600 104.62970 67.09800 41.23800
[51] 61.42368 57.43200 35.70000 80.73500 89.70000 34.82500 102.21440 57.56300 94.21980 51.31100
[61] 39.75500 102.04410 56.63200 45.43100 38.93700 56.52500 40.60300 51.76200 68.36630 54.33400
[71] 80.30000 47.16500 82.78900 53.88500 82.10000 39.39300 76.14760 35.94500 81.84390 35.23700
[81] 35.39100 93.24770 47.85000 40.29300 109.94680 92.10000 66.95930 50.64000 68.04600 97.91000
[91] 76.73000 74.19400 50.20000 31.26200 40.54400 93.37000 96.03500 39.55100 83.92600 38.72100
[101] 37.92500 61.63000 103.00000 39.76300 33.73800 78.55290 105.07650 85.43440 77.05600 77.02300
[111] 57.14100 37.68600 65.54700 62.56400 76.46200 47.63800 78.41380 19.71900 71.92600 44.27500
[121] 45.10500 40.00100 49.90000 44.50900 83.77500 43.83700 83.00000 51.36100 78.04500 36.28800
[131] 104.15640 106.65340 34.22300 95.90000 86.86900 40.48200 63.71000 59.39800 60.79000 35.39900
[141] 72.44920 84.70000 69.99560 58.48800 73.19300 97.24000 58.56450 81.17000 41.68900 81.07600
[151] 56.17700 51.63300 90.30000 38.57800 38.42900 40.58530 45.39100 60.70000 51.22600 57.53700
[161] 55.85500 87.98260 82.87560 106.58360 54.79300 69.00000 50.24300 48.04500 40.58000 39.98100
[171] 46.79200 30.92200 36.85500 60.40900 78.39000 63.60000 63.08600 43.91800 59.67400 52.10000
[181] 72.06400 96.70000 60.70000 68.30600 74.74200 56.00000 59.43700 38.03900 76.99400 41.47200
[191] 52.94700 67.35000 44.59400 55.87100 54.21500
#add column
head(stats)
stats$Mycalc <- stats$Birth.rate * stats$Internet.users
head(stats)
#remove column
head(stats)
stats$Mycalc <- NULL
head(stats)
#---------------- Filtering data frames
head(stats)
stats$Internet.users < 2
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
[19] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
[55] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[127] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
[163] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
filter <- stats$Internet.users < 2
stats[filter,]
stats[stats$Birth.rate > 40,]
stats[stats$Birth.rate > 40 & stats$Internet.users < 2,]
stats[stats$Income.Group == "High income",]
stats[stats$Country.Name == "Malta",]
#---------------- Introduction to q plot
qplot(data=stats, x=Internet.users)

qplot(data=stats, x=Income.Group, y=Birth.rate)

qplot(data=stats, x=Income.Group, y=Birth.rate, size=I(3))

qplot(data=stats, x=Income.Group, y=Birth.rate, size=I(3), color=I("blue"))

qplot(data=stats, x=Income.Group, y=Birth.rate, geom="boxplot")

#---------------- Visualizing
qplot(data=stats, x=Internet.users, y=Birth.rate)

qplot(data=stats, x=Internet.users, y=Birth.rate, size=I(4))

qplot(data=stats, x=Internet.users, y=Birth.rate, color=I("red"),size=I(4))

qplot(data=stats, x=Internet.users, y=Birth.rate, color=Income.Group,size=I(4))

LS0tCnRpdGxlOiAiRGVtb2dyYXBoaWMgQW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgoKYGBge3J9CgojTWV0aG9kIDE6IFNlbGVjdCB0aGUgZmlsZSBtYW51YWxseQpzdGF0cyA8LSByZWFkLmNzdihmaWxlLmNob29zZSgpKQpzdGF0cwoKI01ldGhvZCAyOiBTZXQgd29ya2luZyBkaXJlY3RvcnkgYW5kIHJlYWQgZGF0YQojV2luZG93cwojZ2V0d2QoIkM6XFxVc2Vyc1xcZXJpY2tpbnNsZXlcXERvY3VtZW50c1xcU2Nob29sXFxSIFByb2dyYW1taW5nIEEtWlxcU2VjdGlvbiA1IERhdGEgZnJhbWVzXFxEZW1vZ3JhcGhpY19BbmFseXNpcyIpCiNNYWMKI3NldHdkKCIvVXNlcnMvZXJpY2tpbnNsZXkvRG9jdW1lbnRzL1NjaG9vbC9SIFByb2dyYW1taW5nIEEtWi9TZWN0aW9uIDUgRGF0YSBmcmFtZXMvRGVtb2dyYXBoaWNfQW5hbHlzaXMiKQojZ2V0d2QoKQojcm0oc3RhdHMpCiNzdGF0cyA8LSByZWFkLmNzdigiRGVtb2dyYXBoaWNEYXRhLmNzdiIpCgojLS0tLS0tLS0tLS0tLS0tLUVwbG9yaW5nIGRhdGEKCnN0YXRzCm5yb3coc3RhdHMpCm5jb2woc3RhdHMpCmhlYWQoc3RhdHMpCnRhaWwoc3RhdHMpCmhlYWQoc3RhdHMsIG49MTApCnRhaWwoc3RhdHMgLCBuPTYpCnN0cihzdGF0cykKI3N0cihzdHJ1Y3R1cmUpCiNydW5pZihyIHVuaWYpCnN1bW1hcnkoc3RhdHMpCgojLS0tLS0tLS0tLS0tLS0tLVVzaW5nIHRoZSAkIHNpZ24Kc3RhdHMKaGVhZChzdGF0cykKc3RhdHNbMywzXQpzdGF0c1szLCJCaXJ0aC5yYXRlIl0Kc3RhdHMkSW50ZXJuZXQudXNlcnMKc3RhdHMkSW50ZXJuZXQudXNlcnNbMl0KbGV2ZWxzKHN0YXRzJEluY29tZS5Hcm91cCkKCiMtLS0tLS0tLS0tLS0tLS0tIEJhc2ljIG9wZXJhdGlvbnMgd2l0aCBhIGRhdGEgZnJhbWUKc3RhdHNbMToxMCxdICNzdWJzZXR0aW5nCnN0YXRzWzM6OSxdCnN0YXRzW2MoNCwxMDApLF0KI1JlbWViZXIgb2YgW10gd29yawpzdGF0c1sxLF0KaXMuZGF0YS5mcmFtZShzdGF0c1sxLF0pCnN0YXRzWywxXQppcy5kYXRhLmZyYW1lKHN0YXRzWywxXSkKc3RhdHNbLDEsZHJvcD1GXQppcy5kYXRhLmZyYW1lKHN0YXRzWywxLGRyb3A9Rl0pCiNtdWx0aXBseSBjb2x1bW5zCmhlYWQoc3RhdHMpCnN0YXRzJEJpcnRoLnJhdGUgKiBzdGF0cyRJbnRlcm5ldC51c2VycwpzdGF0cyRCaXJ0aC5yYXRlICsgc3RhdHMkSW50ZXJuZXQudXNlcnMKI2FkZCBjb2x1bW4KaGVhZChzdGF0cykKc3RhdHMkTXljYWxjIDwtIHN0YXRzJEJpcnRoLnJhdGUgKiBzdGF0cyRJbnRlcm5ldC51c2VycwpoZWFkKHN0YXRzKQojcmVtb3ZlIGNvbHVtbgpoZWFkKHN0YXRzKQpzdGF0cyRNeWNhbGMgPC0gTlVMTApoZWFkKHN0YXRzKQoKIy0tLS0tLS0tLS0tLS0tLS0gRmlsdGVyaW5nIGRhdGEgZnJhbWVzCmhlYWQoc3RhdHMpCnN0YXRzJEludGVybmV0LnVzZXJzIDwgMgpmaWx0ZXIgPC0gc3RhdHMkSW50ZXJuZXQudXNlcnMgPCAyCnN0YXRzW2ZpbHRlcixdCnN0YXRzW3N0YXRzJEJpcnRoLnJhdGUgPiA0MCxdCnN0YXRzW3N0YXRzJEJpcnRoLnJhdGUgPiA0MCAmIHN0YXRzJEludGVybmV0LnVzZXJzIDwgMixdCnN0YXRzW3N0YXRzJEluY29tZS5Hcm91cCA9PSAiSGlnaCBpbmNvbWUiLF0Kc3RhdHNbc3RhdHMkQ291bnRyeS5OYW1lID09ICJNYWx0YSIsXQoKIy0tLS0tLS0tLS0tLS0tLS0gSW50cm9kdWN0aW9uIHRvIHEgcGxvdApxcGxvdChkYXRhPXN0YXRzLCB4PUludGVybmV0LnVzZXJzKQpxcGxvdChkYXRhPXN0YXRzLCB4PUluY29tZS5Hcm91cCwgeT1CaXJ0aC5yYXRlKQpxcGxvdChkYXRhPXN0YXRzLCB4PUluY29tZS5Hcm91cCwgeT1CaXJ0aC5yYXRlLCBzaXplPUkoMykpCnFwbG90KGRhdGE9c3RhdHMsIHg9SW5jb21lLkdyb3VwLCB5PUJpcnRoLnJhdGUsIHNpemU9SSgzKSwgY29sb3I9SSgiYmx1ZSIpKQpxcGxvdChkYXRhPXN0YXRzLCB4PUluY29tZS5Hcm91cCwgeT1CaXJ0aC5yYXRlLCBnZW9tPSJib3hwbG90IikKCiMtLS0tLS0tLS0tLS0tLS0tIFZpc3VhbGl6aW5nCnFwbG90KGRhdGE9c3RhdHMsIHg9SW50ZXJuZXQudXNlcnMsIHk9QmlydGgucmF0ZSkKcXBsb3QoZGF0YT1zdGF0cywgeD1JbnRlcm5ldC51c2VycywgeT1CaXJ0aC5yYXRlLCBzaXplPUkoNCkpCnFwbG90KGRhdGE9c3RhdHMsIHg9SW50ZXJuZXQudXNlcnMsIHk9QmlydGgucmF0ZSwgY29sb3I9SSgicmVkIiksc2l6ZT1JKDQpKQpxcGxvdChkYXRhPXN0YXRzLCB4PUludGVybmV0LnVzZXJzLCB5PUJpcnRoLnJhdGUsIGNvbG9yPUluY29tZS5Hcm91cCxzaXplPUkoNCkpCgoKCgoKCgoKCgoKYGBgCgoK