This should include summary statistics, means, medians, quartiles, or any other relevant information about the data set. Please include some conclusions in the R Markdown text
str(myGitHubData) # view structure of data
## 'data.frame': 9357 obs. of 15 variables:
## $ Date : POSIXct, format: "2004-03-10" "2004-03-10" ...
## $ Time : POSIXct, format: "1899-12-31 18:00:00" "1899-12-31 19:00:00" ...
## $ PT08.S1(CO) : num 1360 1292 1402 1376 1272 ...
## $ NMHC(GT) : num 150 112 88 80 51 38 31 31 24 19 ...
## $ C6H6(GT) : num 11.9 9.4 9 9.2 6.5 4.7 3.6 3.3 2.3 1.7 ...
## $ PT08.S2(NMHC): num 1046 955 939 948 836 ...
## $ NOx(GT) : num 166 103 131 172 131 89 62 62 45 -200 ...
## $ PT08.S3(NOx) : num 1056 1174 1140 1092 1205 ...
## $ NO2(GT) : num 113 92 114 122 116 96 77 76 60 -200 ...
## $ PT08.S4(NO2) : num 1692 1559 1555 1584 1490 ...
## $ PT08.S5(O3) : num 1268 972 1074 1203 1110 ...
## $ T : num 13.6 13.3 11.9 11 11.2 11.2 11.3 10.7 10.7 10.3 ...
## $ RH : num 48.9 47.7 54 60 59.6 59.2 56.8 60 59.7 60.2 ...
## $ AH : num 0.758 0.726 0.75 0.787 0.789 ...
## $ CO_GT : num 2.6 2 2.2 2.2 1.6 1.2 1.2 1 0.9 0.6 ...
summary(myGitHubData)
## Date Time
## Min. :2004-03-10 00:00:00 Min. :1899-12-31 00:00:00
## 1st Qu.:2004-06-16 00:00:00 1st Qu.:1899-12-31 05:00:00
## Median :2004-09-21 00:00:00 Median :1899-12-31 11:00:00
## Mean :2004-09-21 04:30:05 Mean :1899-12-31 11:29:55
## 3rd Qu.:2004-12-28 00:00:00 3rd Qu.:1899-12-31 18:00:00
## Max. :2005-04-04 00:00:00 Max. :1899-12-31 23:00:00
## PT08.S1(CO) NMHC(GT) C6H6(GT) PT08.S2(NMHC)
## Min. :-200 Min. :-200.0 Min. :-200.000 Min. :-200.0
## 1st Qu.: 921 1st Qu.:-200.0 1st Qu.: 4.000 1st Qu.: 711.0
## Median :1053 Median :-200.0 Median : 7.900 Median : 895.0
## Mean :1049 Mean :-159.1 Mean : 1.866 Mean : 894.6
## 3rd Qu.:1221 3rd Qu.:-200.0 3rd Qu.: 13.600 3rd Qu.:1105.0
## Max. :2040 Max. :1189.0 Max. : 63.700 Max. :2214.0
## NOx(GT) PT08.S3(NOx) NO2(GT) PT08.S4(NO2)
## Min. :-200.0 Min. :-200 Min. :-200.00 Min. :-200
## 1st Qu.: 50.0 1st Qu.: 637 1st Qu.: 53.00 1st Qu.:1185
## Median : 141.0 Median : 794 Median : 96.00 Median :1446
## Mean : 168.6 Mean : 795 Mean : 58.15 Mean :1391
## 3rd Qu.: 284.0 3rd Qu.: 960 3rd Qu.: 133.00 3rd Qu.:1662
## Max. :1479.0 Max. :2683 Max. : 340.00 Max. :2775
## PT08.S5(O3) T RH AH
## Min. :-200.0 Min. :-200.000 Min. :-200.00 Min. :-200.0000
## 1st Qu.: 700.0 1st Qu.: 10.900 1st Qu.: 34.10 1st Qu.: 0.6923
## Median : 942.0 Median : 17.200 Median : 48.60 Median : 0.9768
## Mean : 975.1 Mean : 9.778 Mean : 39.49 Mean : -6.8376
## 3rd Qu.:1255.0 3rd Qu.: 24.100 3rd Qu.: 61.90 3rd Qu.: 1.2962
## Max. :2523.0 Max. : 44.600 Max. : 88.70 Max. : 2.2310
## CO_GT
## Min. :-200.00
## 1st Qu.: 0.60
## Median : 1.50
## Mean : -34.21
## 3rd Qu.: 2.60
## Max. : 11.90
head(aggregate(myGitHubDataFrame[, 3:15], list(myGitHubDataFrame$Date), mean),n=10)
## Group.1 PT08.S1(CO) NMHC(GT) C6H6(GT) PT08.S2(NMHC) NOx(GT)
## 1 2004-03-10 1316.500 86.50000 8.450000 912.3333 132.0000
## 2 2004-03-11 1244.167 104.50000 7.979167 851.9583 130.0417
## 3 2004-03-12 1281.667 141.50000 12.129167 1008.2917 142.5833
## 4 2004-03-13 1330.667 139.25000 10.916667 992.8333 168.4167
## 5 2004-03-14 1361.125 116.95833 9.637500 943.9167 132.1667
## 6 2004-03-15 1452.167 236.62500 16.091667 1138.9167 197.8333
## 7 2004-03-16 1339.667 191.12500 13.500000 1061.9167 165.7083
## 8 2004-03-17 1362.833 207.45833 15.237500 1099.0833 183.2917
## 9 2004-03-18 1352.667 -62.83333 13.795833 1072.5833 160.8333
## 10 2004-03-19 1304.292 -200.00000 13.520833 1058.0000 164.2500
## PT08.S3(NOx) NO2(GT) PT08.S4(NO2) PT08.S5(O3) T RH
## 1 1167.3333 108.83333 1545.500 1096.000 12.03333 54.90000
## 2 1277.2500 87.37500 1522.833 885.250 9.83750 64.07500
## 3 1101.8750 89.91667 1627.292 1084.375 11.28750 51.09583
## 4 993.2083 105.58333 1595.792 1245.917 12.86667 51.53333
## 5 1001.2917 97.45833 1602.375 1234.208 16.01250 48.85000
## 6 845.6250 105.45833 1888.625 1439.083 17.36667 50.32083
## 7 939.2917 104.91667 1726.625 1322.625 18.62500 44.83750
## 8 930.6667 106.62500 1741.208 1405.875 18.77917 41.16667
## 9 901.1667 111.58333 1702.833 1301.000 17.76250 44.08750
## 10 913.3750 104.83333 1745.417 1095.792 14.19167 57.90417
## AH CO_GT
## 1 0.7656333 1.966667
## 2 0.7757667 -6.187500
## 3 0.6631042 -14.095833
## 4 0.7322958 -5.750000
## 5 0.8496708 -5.966667
## 6 0.9440292 -4.975000
## 7 0.8710208 -5.679167
## 8 0.8049167 -5.387500
## 9 0.8260167 -5.745833
## 10 0.9239583 -5.491667
head(aggregate(myGitHubDataFrame[, 3:15], list(myGitHubDataFrame$Date), median),n=10)
## Group.1 PT08.S1(CO) NMHC(GT) C6H6(GT) PT08.S2(NMHC) NOx(GT)
## 1 2004-03-10 1326.0 84.0 9.10 943.5 131.0
## 2 2004-03-11 1234.5 62.0 5.75 800.5 108.0
## 3 2004-03-12 1280.5 152.5 12.20 1057.0 161.5
## 4 2004-03-13 1327.0 119.0 10.75 1005.0 162.5
## 5 2004-03-14 1346.5 99.5 8.90 936.5 145.0
## 6 2004-03-15 1408.5 199.0 14.45 1130.5 165.0
## 7 2004-03-16 1398.0 209.5 15.80 1173.0 182.5
## 8 2004-03-17 1391.0 170.0 14.35 1129.0 182.0
## 9 2004-03-18 1379.0 -200.0 13.05 1085.0 151.5
## 10 2004-03-19 1334.0 -200.0 15.85 1174.0 195.0
## PT08.S3(NOx) NO2(GT) PT08.S4(NO2) PT08.S5(O3) T RH AH CO_GT
## 1 1157.0 113.5 1557.0 1092.0 11.55 56.60 0.77130 2.10
## 2 1259.5 96.5 1423.5 810.0 10.15 60.70 0.76255 1.65
## 3 961.5 119.5 1633.5 1086.5 11.55 54.75 0.64685 2.60
## 4 969.5 121.5 1601.0 1168.0 14.65 48.95 0.71655 2.65
## 5 947.5 110.5 1612.5 1243.5 15.50 51.25 0.85345 2.30
## 6 847.5 110.5 1803.5 1257.5 16.20 54.50 0.92000 2.85
## 7 856.5 128.5 1749.0 1362.5 17.85 44.40 0.87245 2.90
## 8 896.0 125.0 1695.5 1412.0 17.45 43.35 0.82595 2.60
## 9 888.0 127.0 1661.5 1254.0 15.40 49.50 0.82145 2.25
## 10 782.0 131.0 1827.5 1219.0 14.35 57.35 0.91310 3.15
head(aggregate(myGitHubDataFrame[, 3:15], list(myGitHubDataFrame$Date), quantile),n=10)
## Group.1 PT08.S1(CO).0% PT08.S1(CO).25% PT08.S1(CO).50%
## 1 2004-03-10 1197.00 1277.00 1326.00
## 2 2004-03-11 913.00 1087.00 1234.50
## 3 2004-03-12 831.00 1071.00 1280.50
## 4 2004-03-13 978.00 1193.00 1327.00
## 5 2004-03-14 1028.00 1228.00 1346.50
## 6 2004-03-15 1075.00 1235.25 1408.50
## 7 2004-03-16 929.00 1146.75 1398.00
## 8 2004-03-17 869.00 1147.75 1391.00
## 9 2004-03-18 931.00 1164.75 1379.00
## 10 2004-03-19 913.00 1164.75 1334.00
## PT08.S1(CO).75% PT08.S1(CO).100% NMHC(GT).0% NMHC(GT).25% NMHC(GT).50%
## 1 1372.00 1402.00 38.00 58.25 84.00
## 2 1337.50 1776.00 8.00 28.25 62.00
## 3 1513.75 1843.00 -200.00 38.25 152.50
## 4 1448.00 1621.00 27.00 64.25 119.00
## 5 1463.00 1898.00 27.00 73.50 99.50
## 6 1639.00 2040.00 39.00 66.75 199.00
## 7 1539.50 1800.00 17.00 49.75 209.50
## 8 1530.75 1975.00 11.00 74.75 170.00
## 9 1500.25 1934.00 -200.00 -200.00 -200.00
## 10 1448.25 1819.00 -200.00 -200.00 -200.00
## NMHC(GT).75% NMHC(GT).100% C6H6(GT).0% C6H6(GT).25% C6H6(GT).50%
## 1 106.00 150.00 4.700 7.125 9.100
## 2 108.75 461.00 1.100 3.050 5.750
## 3 209.75 488.00 1.000 5.525 12.200
## 4 195.75 284.00 2.600 7.100 10.750
## 5 145.75 341.00 2.400 7.225 8.900
## 6 334.50 685.00 3.900 6.975 14.450
## 7 283.50 437.00 2.000 5.325 15.800
## 8 287.00 577.00 1.600 6.975 14.350
## 9 42.50 506.00 2.500 8.000 13.050
## 10 -200.00 -200.00 1.300 6.675 15.850
## C6H6(GT).75% C6H6(GT).100% PT08.S2(NMHC).0% PT08.S2(NMHC).25%
## 1 9.350 11.900 750.00 861.75
## 2 9.925 27.400 512.00 657.50
## 3 17.975 32.600 501.00 786.25
## 4 14.275 19.600 625.00 860.00
## 5 11.900 23.100 615.00 866.25
## 6 21.550 39.200 703.00 854.50
## 7 19.250 25.100 585.00 780.50
## 8 21.325 38.400 554.00 854.50
## 9 17.925 35.800 623.00 899.25
## 10 19.500 31.300 525.00 843.75
## PT08.S2(NMHC).50% PT08.S2(NMHC).75% PT08.S2(NMHC).100% NOx(GT).0%
## 1 943.50 953.25 1046.00 89.00
## 2 800.50 975.00 1488.00 -200.00
## 3 1057.00 1238.50 1610.00 -200.00
## 4 1005.00 1126.00 1286.00 -200.00
## 5 936.50 1045.00 1381.00 -200.00
## 6 1130.50 1337.75 1754.00 -200.00
## 7 1173.00 1275.00 1431.00 -200.00
## 8 1129.00 1331.75 1737.00 -200.00
## 9 1085.00 1237.00 1682.00 -200.00
## 10 1174.00 1283.00 1582.00 -200.00
## NOx(GT).25% NOx(GT).50% NOx(GT).75% NOx(GT).100% PT08.S3(NOx).0%
## 1 110.00 131.00 157.25 172.00 1056.00
## 2 58.25 108.00 196.50 383.00 702.00
## 3 102.25 161.50 208.50 340.00 624.00
## 4 118.75 162.50 251.50 296.00 754.00
## 5 102.50 145.00 174.50 325.00 681.00
## 6 107.50 165.00 315.25 478.00 537.00
## 7 94.75 182.50 234.75 396.00 628.00
## 8 122.75 182.00 284.75 411.00 553.00
## 9 94.25 151.50 216.50 421.00 541.00
## 10 112.00 195.00 249.00 357.00 575.00
## PT08.S3(NOx).25% PT08.S3(NOx).50% PT08.S3(NOx).75% PT08.S3(NOx).100%
## 1 1104.00 1157.00 1197.25 1337.00
## 2 1061.25 1259.50 1508.75 1918.00
## 3 834.50 961.50 1287.25 1895.00
## 4 840.75 969.50 1099.00 1420.00
## 5 902.50 947.50 1089.00 1395.00
## 6 671.50 847.50 1000.50 1156.00
## 7 772.25 856.50 1114.00 1412.00
## 8 724.00 896.00 1045.00 1460.00
## 9 788.00 888.00 962.25 1293.00
## 10 703.25 782.00 1029.25 1620.00
## NO2(GT).0% NO2(GT).25% NO2(GT).50% NO2(GT).75% NO2(GT).100%
## 1 92.00 100.25 113.50 115.50 122.00
## 2 -200.00 72.00 96.50 128.75 172.00
## 3 -200.00 95.50 119.50 139.50 170.00
## 4 -200.00 92.75 121.50 144.25 165.00
## 5 -200.00 83.75 110.50 122.75 173.00
## 6 -200.00 92.00 110.50 146.00 187.00
## 7 -200.00 83.50 128.50 144.75 177.00
## 8 -200.00 88.50 125.00 137.50 194.00
## 9 -200.00 96.25 127.00 150.25 176.00
## 10 -200.00 98.25 131.00 144.50 166.00
## PT08.S4(NO2).0% PT08.S4(NO2).25% PT08.S4(NO2).50% PT08.S4(NO2).75%
## 1 1393.00 1506.25 1557.00 1577.75
## 2 1182.00 1318.75 1423.50 1662.00
## 3 1134.00 1371.50 1633.50 1860.25
## 4 1268.00 1450.25 1601.00 1755.25
## 5 1333.00 1505.75 1612.50 1668.75
## 6 1464.00 1561.25 1803.50 2109.00
## 7 1348.00 1479.00 1749.00 1891.00
## 8 1268.00 1500.50 1695.50 1985.50
## 9 1307.00 1534.25 1661.50 1814.50
## 10 1260.00 1529.50 1827.50 1955.50
## PT08.S4(NO2).100% PT08.S5(O3).0% PT08.S5(O3).25% PT08.S5(O3).50%
## 1 1692.00 949.00 997.50 1092.00
## 2 2333.00 422.00 653.00 810.00
## 3 2390.00 384.00 799.00 1086.50
## 4 1922.00 819.00 995.00 1168.00
## 5 2103.00 853.00 974.75 1243.50
## 6 2679.00 1010.00 1146.00 1257.50
## 7 2211.00 793.00 1012.50 1362.50
## 8 2535.00 667.00 1050.25 1412.00
## 9 2468.00 828.00 1100.50 1254.00
## 10 2456.00 370.00 863.75 1219.00
## PT08.S5(O3).75% PT08.S5(O3).100% T.0% T.25% T.50% T.75% T.100%
## 1 1179.75 1268.00 11.000 11.200 11.550 12.950 13.600
## 2 1053.25 1704.00 8.000 9.400 10.150 10.550 11.300
## 3 1419.50 1887.00 6.100 7.250 11.550 15.025 16.900
## 4 1482.75 1886.00 6.300 8.675 14.650 16.250 19.400
## 5 1372.00 1905.00 10.400 12.325 15.500 19.400 22.200
## 6 1729.75 2184.00 11.300 13.200 16.200 22.125 24.400
## 7 1526.00 2034.00 11.000 13.600 17.850 24.025 28.200
## 8 1710.50 2359.00 9.900 13.850 17.450 24.625 29.300
## 9 1555.25 2051.00 10.600 13.700 15.400 23.200 27.100
## 10 1405.75 1716.00 11.900 12.500 14.350 15.575 16.400
## RH.0% RH.25% RH.50% RH.75% RH.100% AH.0% AH.25% AH.50% AH.75%
## 1 47.700 50.175 56.600 59.500 60.000 0.725500 0.752100 0.771300 0.786225
## 2 56.200 59.300 60.700 67.650 81.100 0.665700 0.741475 0.762550 0.814600
## 3 34.300 38.100 54.750 62.450 65.900 0.619500 0.626175 0.646850 0.668425
## 4 31.300 40.200 48.950 63.950 71.900 0.688700 0.703450 0.716550 0.748050
## 5 28.400 37.900 51.250 60.275 67.600 0.751600 0.843150 0.853450 0.872900
## 6 28.900 35.075 54.500 61.925 70.500 0.873600 0.905525 0.920000 0.949300
## 7 18.600 27.075 44.400 62.375 68.500 0.701400 0.825575 0.872450 0.929775
## 8 14.900 26.350 43.350 56.050 65.200 0.523700 0.795750 0.825950 0.872700
## 9 17.800 28.575 49.500 58.075 64.200 0.627500 0.800825 0.821450 0.855775
## 10 47.100 50.525 57.350 66.425 71.100 0.864000 0.894625 0.913100 0.958475
## AH.100% CO_GT.0% CO_GT.25% CO_GT.50% CO_GT.75% CO_GT.100%
## 1 0.788800 1.200 1.700 2.100 2.200 2.600
## 2 0.877800 -200.000 1.000 1.650 2.375 6.900
## 3 0.777100 -200.000 1.400 2.600 3.550 6.600
## 4 0.819300 -200.000 1.850 2.650 3.300 4.200
## 5 0.934100 -200.000 1.750 2.300 2.825 5.900
## 6 1.094500 -200.000 1.800 2.850 4.525 8.100
## 7 1.049400 -200.000 1.275 2.900 3.725 5.300
## 8 0.929400 -200.000 1.500 2.600 4.150 7.600
## 9 0.960600 -200.000 1.475 2.250 3.325 6.600
## 10 1.002900 -200.000 1.575 3.150 4.000 6.200
As can be seen from the mean values of CO(GT) is worst in October, 2004 and best in Feb, 2005 for the given sample of data
Please perform some basic transformations. They will need to make sense but could include column renaming, creating a subset of the data, replacing values, or creating new columns with derived data (for example - if it makes sense you could sum two columns together)
myGitHubDataFrame <- myGitHubDataFrame %>% mutate(month=month(Date),year=year(Date),JulianDay=julian(as.Date(as.character(as.POSIXct(Date)))))
myGitHubDataFrame_month <- myGitHubDataFrame %>% group_by(month,year) %>% summarise(mean_CO_GT_month = mean(CO_GT))
myGitHubDataFrame_month
## # A tibble: 14 x 3
## # Groups: month [?]
## month year mean_CO_GT_month
## <dbl> <dbl> <dbl>
## 1 1 2005 -16.3
## 2 2 2005 -0.610
## 3 3 2004 -4.85
## 4 3 2005 -3.39
## 5 4 2004 -60.9
## 6 4 2005 -3.42
## 7 5 2004 -39.3
## 8 6 2004 -19.4
## 9 7 2004 -48.7
## 10 8 2004 -71.0
## 11 9 2004 -43.9
## 12 10 2004 -92.9
## 13 11 2004 -8.86
## 14 12 2004 -25.6
myGitHubDataFrame_day <- myGitHubDataFrame %>% group_by(Date) %>% summarise(mean_CO_GT_day = mean(CO_GT))
myGitHubDataFrame_day <- myGitHubDataFrame_day %>% mutate(year = year(Date), month = month(Date), day=as.Date(as.character(as.POSIXct(Date))) )
head(myGitHubDataFrame_day,n=10)
## # A tibble: 10 x 5
## Date mean_CO_GT_day year month day
## <dttm> <dbl> <dbl> <dbl> <date>
## 1 2004-03-10 00:00:00 1.97 2004 3 2004-03-10
## 2 2004-03-11 00:00:00 -6.19 2004 3 2004-03-11
## 3 2004-03-12 00:00:00 -14.1 2004 3 2004-03-12
## 4 2004-03-13 00:00:00 -5.75 2004 3 2004-03-13
## 5 2004-03-14 00:00:00 -5.97 2004 3 2004-03-14
## 6 2004-03-15 00:00:00 -4.97 2004 3 2004-03-15
## 7 2004-03-16 00:00:00 -5.68 2004 3 2004-03-16
## 8 2004-03-17 00:00:00 -5.39 2004 3 2004-03-17
## 9 2004-03-18 00:00:00 -5.75 2004 3 2004-03-18
## 10 2004-03-19 00:00:00 -5.49 2004 3 2004-03-19
myGitHubDataFrame_JulianDay <- myGitHubDataFrame_day %>% mutate(JulianDay = julian(day))
head(myGitHubDataFrame_JulianDay,n=10)
## # A tibble: 10 x 6
## Date mean_CO_GT_day year month day JulianDay
## <dttm> <dbl> <dbl> <dbl> <date> <dbl>
## 1 2004-03-10 00:00:00 1.97 2004 3 2004-03-10 12487
## 2 2004-03-11 00:00:00 -6.19 2004 3 2004-03-11 12488
## 3 2004-03-12 00:00:00 -14.1 2004 3 2004-03-12 12489
## 4 2004-03-13 00:00:00 -5.75 2004 3 2004-03-13 12490
## 5 2004-03-14 00:00:00 -5.97 2004 3 2004-03-14 12491
## 6 2004-03-15 00:00:00 -4.97 2004 3 2004-03-15 12492
## 7 2004-03-16 00:00:00 -5.68 2004 3 2004-03-16 12493
## 8 2004-03-17 00:00:00 -5.39 2004 3 2004-03-17 12494
## 9 2004-03-18 00:00:00 -5.75 2004 3 2004-03-18 12495
## 10 2004-03-19 00:00:00 -5.49 2004 3 2004-03-19 12496
Please make sure to display at least one scatter plot, box plot and histogram. Don’t be limited to this. Please explore the many other options in R packages such as ggplot2.
myGitHubDataFrame %>%
ggplot(aes(x = Date, y = CO_GT)) +
geom_point(color = "darkorchid4") +
labs(title = "Air Quality CO Daily",
subtitle = "The data frame is sent to the plot using pipes",
y = "Daily CO values",
x = "Date") + theme_bw(base_size = 20)
myGitHubDataFrame_month %>%
ggplot(aes(x = month, y = mean_CO_GT_month)) +
geom_point(color = "darkorchid4") +
labs(title = "Air Quality CO Monthly",
subtitle = "The data frame is sent to the plot using pipes",
y = "Monthly CO values",
x = "Month") + theme_bw(base_size = 20)
myGitHubDataFrame_day %>%
na.omit() %>%
ggplot(aes(x = day, y = mean_CO_GT_day)) +
geom_point(color = "darkorchid4") +
facet_wrap( ~ year ) +
labs(title = "Air Quality CO",
subtitle = "Use facets to plot by a variable - year in this case",
y = "Daily CO values",
x = "Date") + theme_bw(base_size = 20) +
# adjust the x axis breaks
scale_x_date(date_breaks = "5 years", date_labels = "%m-%Y")
myGitHubDataFrame_JulianDay %>%
ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
geom_point(color = "darkorchid4") +
facet_wrap( ~ year, ncol = 3) +
labs(title = "Air Quality CO",
subtitle = "Data plotted by year",
y = "Daily CO values",
x = "Day of Year") + theme_bw(base_size = 15)
myGitHubDataFrame_JulianDay %>%
filter(JulianDay >= 12487 & JulianDay <= 12783) %>% ## origin date = "1970-01-01"
ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
geom_bar(stat = "identity", fill = "darkorchid4") +
facet_wrap( ~ year, ncol = 3) +
labs(title = "Air Quality CO",
subtitle = "Data plotted by year",
y = "Daily CO values by year",
x = "Julian Date") + theme_bw(base_size = 15)
myGitHubDataFrame_JulianDay %>%
ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
geom_bar(stat = "identity", fill = "darkorchid4") +
facet_wrap( ~ month, ncol = 3) +
labs(title = "Air Quality CO",
subtitle = "Data plotted by Month",
y = "Daily CO values by month",
x = "Julian Date") + theme_bw(base_size = 15)
myGitHubDataFrame_month %>%
ggplot(aes(x = month, y = mean_CO_GT_month)) +
geom_bar(stat = "identity", fill = "darkorchid4") +
facet_wrap(~ year, ncol = 3) +
labs(title = "Monthly Air Quality CO",
subtitle = "Data plotted by Year",
y = "Monthly CO values",
x = "Month") + theme_bw(base_size = 15)
myGitHubDataFrame_JulianDay %>%
filter(JulianDay >= 12692 & JulianDay <= 12722) %>% ## origin date = "1970-01-01" - Month of October 2004
ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
geom_bar(stat = "identity", fill = "darkorchid4") +
facet_wrap( ~ month, ncol = 3) +
labs(title = "Daily Air Quality CO",
subtitle = "Data plotted for October 2004 by Day",
y = "Daily CO values by month",
x = "Julian Date") + theme_bw(base_size = 15)
myGitHubDataFrame %>%
filter((JulianDay>=12693 & JulianDay<=12697) | (JulianDay>=12705 & JulianDay<=12710)) %>% #10/02/2004-10/06/2004 OR 10/14/20004-10/19/2004
ggplot(aes(x = Time, y = CO_GT)) +
geom_bar(stat = "identity", fill = "darkorchid4") +
facet_wrap( ~ JulianDay, ncol = 3) +
labs(title = "Hourly Air Quality CO",
subtitle = "Data plotted for October 2004 by Time",
y = "Daily CO values by month",
x = "Julian Date") + theme_bw(base_size = 5)
Please state at the beginning a meaningful question for analysis. Use the first three steps and anything else that would be helpful to answer the question you are posing from the data set you chose. Please write a brief conclusion paragraph in R markdown at the end.
As can be inferred from the latest yearly bar chart, the month October in 2004 had the worst Air Quality
As can be inferred from the latest monthly bar chart for October month, the days 2-6 and 14-19 were the worst days in the month of October 2004
As can be inferred from latest hourly bar chart for the worst days in October month, it seems entire day for all the 24 hours the air quality for CO content is the worst throughout the day