Data Exploration

This should include summary statistics, means, medians, quartiles, or any other relevant information about the data set. Please include some conclusions in the R Markdown text

str(myGitHubData) # view structure of data
## 'data.frame':    9357 obs. of  15 variables:
##  $ Date         : POSIXct, format: "2004-03-10" "2004-03-10" ...
##  $ Time         : POSIXct, format: "1899-12-31 18:00:00" "1899-12-31 19:00:00" ...
##  $ PT08.S1(CO)  : num  1360 1292 1402 1376 1272 ...
##  $ NMHC(GT)     : num  150 112 88 80 51 38 31 31 24 19 ...
##  $ C6H6(GT)     : num  11.9 9.4 9 9.2 6.5 4.7 3.6 3.3 2.3 1.7 ...
##  $ PT08.S2(NMHC): num  1046 955 939 948 836 ...
##  $ NOx(GT)      : num  166 103 131 172 131 89 62 62 45 -200 ...
##  $ PT08.S3(NOx) : num  1056 1174 1140 1092 1205 ...
##  $ NO2(GT)      : num  113 92 114 122 116 96 77 76 60 -200 ...
##  $ PT08.S4(NO2) : num  1692 1559 1555 1584 1490 ...
##  $ PT08.S5(O3)  : num  1268 972 1074 1203 1110 ...
##  $ T            : num  13.6 13.3 11.9 11 11.2 11.2 11.3 10.7 10.7 10.3 ...
##  $ RH           : num  48.9 47.7 54 60 59.6 59.2 56.8 60 59.7 60.2 ...
##  $ AH           : num  0.758 0.726 0.75 0.787 0.789 ...
##  $ CO_GT        : num  2.6 2 2.2 2.2 1.6 1.2 1.2 1 0.9 0.6 ...
summary(myGitHubData)
##       Date                          Time                    
##  Min.   :2004-03-10 00:00:00   Min.   :1899-12-31 00:00:00  
##  1st Qu.:2004-06-16 00:00:00   1st Qu.:1899-12-31 05:00:00  
##  Median :2004-09-21 00:00:00   Median :1899-12-31 11:00:00  
##  Mean   :2004-09-21 04:30:05   Mean   :1899-12-31 11:29:55  
##  3rd Qu.:2004-12-28 00:00:00   3rd Qu.:1899-12-31 18:00:00  
##  Max.   :2005-04-04 00:00:00   Max.   :1899-12-31 23:00:00  
##   PT08.S1(CO)      NMHC(GT)         C6H6(GT)        PT08.S2(NMHC)   
##  Min.   :-200   Min.   :-200.0   Min.   :-200.000   Min.   :-200.0  
##  1st Qu.: 921   1st Qu.:-200.0   1st Qu.:   4.000   1st Qu.: 711.0  
##  Median :1053   Median :-200.0   Median :   7.900   Median : 895.0  
##  Mean   :1049   Mean   :-159.1   Mean   :   1.866   Mean   : 894.6  
##  3rd Qu.:1221   3rd Qu.:-200.0   3rd Qu.:  13.600   3rd Qu.:1105.0  
##  Max.   :2040   Max.   :1189.0   Max.   :  63.700   Max.   :2214.0  
##     NOx(GT)        PT08.S3(NOx)     NO2(GT)         PT08.S4(NO2) 
##  Min.   :-200.0   Min.   :-200   Min.   :-200.00   Min.   :-200  
##  1st Qu.:  50.0   1st Qu.: 637   1st Qu.:  53.00   1st Qu.:1185  
##  Median : 141.0   Median : 794   Median :  96.00   Median :1446  
##  Mean   : 168.6   Mean   : 795   Mean   :  58.15   Mean   :1391  
##  3rd Qu.: 284.0   3rd Qu.: 960   3rd Qu.: 133.00   3rd Qu.:1662  
##  Max.   :1479.0   Max.   :2683   Max.   : 340.00   Max.   :2775  
##   PT08.S5(O3)           T                  RH                AH           
##  Min.   :-200.0   Min.   :-200.000   Min.   :-200.00   Min.   :-200.0000  
##  1st Qu.: 700.0   1st Qu.:  10.900   1st Qu.:  34.10   1st Qu.:   0.6923  
##  Median : 942.0   Median :  17.200   Median :  48.60   Median :   0.9768  
##  Mean   : 975.1   Mean   :   9.778   Mean   :  39.49   Mean   :  -6.8376  
##  3rd Qu.:1255.0   3rd Qu.:  24.100   3rd Qu.:  61.90   3rd Qu.:   1.2962  
##  Max.   :2523.0   Max.   :  44.600   Max.   :  88.70   Max.   :   2.2310  
##      CO_GT        
##  Min.   :-200.00  
##  1st Qu.:   0.60  
##  Median :   1.50  
##  Mean   : -34.21  
##  3rd Qu.:   2.60  
##  Max.   :  11.90
head(aggregate(myGitHubDataFrame[, 3:15], list(myGitHubDataFrame$Date), mean),n=10)
##       Group.1 PT08.S1(CO)   NMHC(GT)  C6H6(GT) PT08.S2(NMHC)  NOx(GT)
## 1  2004-03-10    1316.500   86.50000  8.450000      912.3333 132.0000
## 2  2004-03-11    1244.167  104.50000  7.979167      851.9583 130.0417
## 3  2004-03-12    1281.667  141.50000 12.129167     1008.2917 142.5833
## 4  2004-03-13    1330.667  139.25000 10.916667      992.8333 168.4167
## 5  2004-03-14    1361.125  116.95833  9.637500      943.9167 132.1667
## 6  2004-03-15    1452.167  236.62500 16.091667     1138.9167 197.8333
## 7  2004-03-16    1339.667  191.12500 13.500000     1061.9167 165.7083
## 8  2004-03-17    1362.833  207.45833 15.237500     1099.0833 183.2917
## 9  2004-03-18    1352.667  -62.83333 13.795833     1072.5833 160.8333
## 10 2004-03-19    1304.292 -200.00000 13.520833     1058.0000 164.2500
##    PT08.S3(NOx)   NO2(GT) PT08.S4(NO2) PT08.S5(O3)        T       RH
## 1     1167.3333 108.83333     1545.500    1096.000 12.03333 54.90000
## 2     1277.2500  87.37500     1522.833     885.250  9.83750 64.07500
## 3     1101.8750  89.91667     1627.292    1084.375 11.28750 51.09583
## 4      993.2083 105.58333     1595.792    1245.917 12.86667 51.53333
## 5     1001.2917  97.45833     1602.375    1234.208 16.01250 48.85000
## 6      845.6250 105.45833     1888.625    1439.083 17.36667 50.32083
## 7      939.2917 104.91667     1726.625    1322.625 18.62500 44.83750
## 8      930.6667 106.62500     1741.208    1405.875 18.77917 41.16667
## 9      901.1667 111.58333     1702.833    1301.000 17.76250 44.08750
## 10     913.3750 104.83333     1745.417    1095.792 14.19167 57.90417
##           AH      CO_GT
## 1  0.7656333   1.966667
## 2  0.7757667  -6.187500
## 3  0.6631042 -14.095833
## 4  0.7322958  -5.750000
## 5  0.8496708  -5.966667
## 6  0.9440292  -4.975000
## 7  0.8710208  -5.679167
## 8  0.8049167  -5.387500
## 9  0.8260167  -5.745833
## 10 0.9239583  -5.491667
head(aggregate(myGitHubDataFrame[, 3:15], list(myGitHubDataFrame$Date), median),n=10)
##       Group.1 PT08.S1(CO) NMHC(GT) C6H6(GT) PT08.S2(NMHC) NOx(GT)
## 1  2004-03-10      1326.0     84.0     9.10         943.5   131.0
## 2  2004-03-11      1234.5     62.0     5.75         800.5   108.0
## 3  2004-03-12      1280.5    152.5    12.20        1057.0   161.5
## 4  2004-03-13      1327.0    119.0    10.75        1005.0   162.5
## 5  2004-03-14      1346.5     99.5     8.90         936.5   145.0
## 6  2004-03-15      1408.5    199.0    14.45        1130.5   165.0
## 7  2004-03-16      1398.0    209.5    15.80        1173.0   182.5
## 8  2004-03-17      1391.0    170.0    14.35        1129.0   182.0
## 9  2004-03-18      1379.0   -200.0    13.05        1085.0   151.5
## 10 2004-03-19      1334.0   -200.0    15.85        1174.0   195.0
##    PT08.S3(NOx) NO2(GT) PT08.S4(NO2) PT08.S5(O3)     T    RH      AH CO_GT
## 1        1157.0   113.5       1557.0      1092.0 11.55 56.60 0.77130  2.10
## 2        1259.5    96.5       1423.5       810.0 10.15 60.70 0.76255  1.65
## 3         961.5   119.5       1633.5      1086.5 11.55 54.75 0.64685  2.60
## 4         969.5   121.5       1601.0      1168.0 14.65 48.95 0.71655  2.65
## 5         947.5   110.5       1612.5      1243.5 15.50 51.25 0.85345  2.30
## 6         847.5   110.5       1803.5      1257.5 16.20 54.50 0.92000  2.85
## 7         856.5   128.5       1749.0      1362.5 17.85 44.40 0.87245  2.90
## 8         896.0   125.0       1695.5      1412.0 17.45 43.35 0.82595  2.60
## 9         888.0   127.0       1661.5      1254.0 15.40 49.50 0.82145  2.25
## 10        782.0   131.0       1827.5      1219.0 14.35 57.35 0.91310  3.15
head(aggregate(myGitHubDataFrame[, 3:15], list(myGitHubDataFrame$Date), quantile),n=10)
##       Group.1 PT08.S1(CO).0% PT08.S1(CO).25% PT08.S1(CO).50%
## 1  2004-03-10        1197.00         1277.00         1326.00
## 2  2004-03-11         913.00         1087.00         1234.50
## 3  2004-03-12         831.00         1071.00         1280.50
## 4  2004-03-13         978.00         1193.00         1327.00
## 5  2004-03-14        1028.00         1228.00         1346.50
## 6  2004-03-15        1075.00         1235.25         1408.50
## 7  2004-03-16         929.00         1146.75         1398.00
## 8  2004-03-17         869.00         1147.75         1391.00
## 9  2004-03-18         931.00         1164.75         1379.00
## 10 2004-03-19         913.00         1164.75         1334.00
##    PT08.S1(CO).75% PT08.S1(CO).100% NMHC(GT).0% NMHC(GT).25% NMHC(GT).50%
## 1          1372.00          1402.00       38.00        58.25        84.00
## 2          1337.50          1776.00        8.00        28.25        62.00
## 3          1513.75          1843.00     -200.00        38.25       152.50
## 4          1448.00          1621.00       27.00        64.25       119.00
## 5          1463.00          1898.00       27.00        73.50        99.50
## 6          1639.00          2040.00       39.00        66.75       199.00
## 7          1539.50          1800.00       17.00        49.75       209.50
## 8          1530.75          1975.00       11.00        74.75       170.00
## 9          1500.25          1934.00     -200.00      -200.00      -200.00
## 10         1448.25          1819.00     -200.00      -200.00      -200.00
##    NMHC(GT).75% NMHC(GT).100% C6H6(GT).0% C6H6(GT).25% C6H6(GT).50%
## 1        106.00        150.00       4.700        7.125        9.100
## 2        108.75        461.00       1.100        3.050        5.750
## 3        209.75        488.00       1.000        5.525       12.200
## 4        195.75        284.00       2.600        7.100       10.750
## 5        145.75        341.00       2.400        7.225        8.900
## 6        334.50        685.00       3.900        6.975       14.450
## 7        283.50        437.00       2.000        5.325       15.800
## 8        287.00        577.00       1.600        6.975       14.350
## 9         42.50        506.00       2.500        8.000       13.050
## 10      -200.00       -200.00       1.300        6.675       15.850
##    C6H6(GT).75% C6H6(GT).100% PT08.S2(NMHC).0% PT08.S2(NMHC).25%
## 1         9.350        11.900           750.00            861.75
## 2         9.925        27.400           512.00            657.50
## 3        17.975        32.600           501.00            786.25
## 4        14.275        19.600           625.00            860.00
## 5        11.900        23.100           615.00            866.25
## 6        21.550        39.200           703.00            854.50
## 7        19.250        25.100           585.00            780.50
## 8        21.325        38.400           554.00            854.50
## 9        17.925        35.800           623.00            899.25
## 10       19.500        31.300           525.00            843.75
##    PT08.S2(NMHC).50% PT08.S2(NMHC).75% PT08.S2(NMHC).100% NOx(GT).0%
## 1             943.50            953.25            1046.00      89.00
## 2             800.50            975.00            1488.00    -200.00
## 3            1057.00           1238.50            1610.00    -200.00
## 4            1005.00           1126.00            1286.00    -200.00
## 5             936.50           1045.00            1381.00    -200.00
## 6            1130.50           1337.75            1754.00    -200.00
## 7            1173.00           1275.00            1431.00    -200.00
## 8            1129.00           1331.75            1737.00    -200.00
## 9            1085.00           1237.00            1682.00    -200.00
## 10           1174.00           1283.00            1582.00    -200.00
##    NOx(GT).25% NOx(GT).50% NOx(GT).75% NOx(GT).100% PT08.S3(NOx).0%
## 1       110.00      131.00      157.25       172.00         1056.00
## 2        58.25      108.00      196.50       383.00          702.00
## 3       102.25      161.50      208.50       340.00          624.00
## 4       118.75      162.50      251.50       296.00          754.00
## 5       102.50      145.00      174.50       325.00          681.00
## 6       107.50      165.00      315.25       478.00          537.00
## 7        94.75      182.50      234.75       396.00          628.00
## 8       122.75      182.00      284.75       411.00          553.00
## 9        94.25      151.50      216.50       421.00          541.00
## 10      112.00      195.00      249.00       357.00          575.00
##    PT08.S3(NOx).25% PT08.S3(NOx).50% PT08.S3(NOx).75% PT08.S3(NOx).100%
## 1           1104.00          1157.00          1197.25           1337.00
## 2           1061.25          1259.50          1508.75           1918.00
## 3            834.50           961.50          1287.25           1895.00
## 4            840.75           969.50          1099.00           1420.00
## 5            902.50           947.50          1089.00           1395.00
## 6            671.50           847.50          1000.50           1156.00
## 7            772.25           856.50          1114.00           1412.00
## 8            724.00           896.00          1045.00           1460.00
## 9            788.00           888.00           962.25           1293.00
## 10           703.25           782.00          1029.25           1620.00
##    NO2(GT).0% NO2(GT).25% NO2(GT).50% NO2(GT).75% NO2(GT).100%
## 1       92.00      100.25      113.50      115.50       122.00
## 2     -200.00       72.00       96.50      128.75       172.00
## 3     -200.00       95.50      119.50      139.50       170.00
## 4     -200.00       92.75      121.50      144.25       165.00
## 5     -200.00       83.75      110.50      122.75       173.00
## 6     -200.00       92.00      110.50      146.00       187.00
## 7     -200.00       83.50      128.50      144.75       177.00
## 8     -200.00       88.50      125.00      137.50       194.00
## 9     -200.00       96.25      127.00      150.25       176.00
## 10    -200.00       98.25      131.00      144.50       166.00
##    PT08.S4(NO2).0% PT08.S4(NO2).25% PT08.S4(NO2).50% PT08.S4(NO2).75%
## 1          1393.00          1506.25          1557.00          1577.75
## 2          1182.00          1318.75          1423.50          1662.00
## 3          1134.00          1371.50          1633.50          1860.25
## 4          1268.00          1450.25          1601.00          1755.25
## 5          1333.00          1505.75          1612.50          1668.75
## 6          1464.00          1561.25          1803.50          2109.00
## 7          1348.00          1479.00          1749.00          1891.00
## 8          1268.00          1500.50          1695.50          1985.50
## 9          1307.00          1534.25          1661.50          1814.50
## 10         1260.00          1529.50          1827.50          1955.50
##    PT08.S4(NO2).100% PT08.S5(O3).0% PT08.S5(O3).25% PT08.S5(O3).50%
## 1            1692.00         949.00          997.50         1092.00
## 2            2333.00         422.00          653.00          810.00
## 3            2390.00         384.00          799.00         1086.50
## 4            1922.00         819.00          995.00         1168.00
## 5            2103.00         853.00          974.75         1243.50
## 6            2679.00        1010.00         1146.00         1257.50
## 7            2211.00         793.00         1012.50         1362.50
## 8            2535.00         667.00         1050.25         1412.00
## 9            2468.00         828.00         1100.50         1254.00
## 10           2456.00         370.00          863.75         1219.00
##    PT08.S5(O3).75% PT08.S5(O3).100%   T.0%  T.25%  T.50%  T.75% T.100%
## 1          1179.75          1268.00 11.000 11.200 11.550 12.950 13.600
## 2          1053.25          1704.00  8.000  9.400 10.150 10.550 11.300
## 3          1419.50          1887.00  6.100  7.250 11.550 15.025 16.900
## 4          1482.75          1886.00  6.300  8.675 14.650 16.250 19.400
## 5          1372.00          1905.00 10.400 12.325 15.500 19.400 22.200
## 6          1729.75          2184.00 11.300 13.200 16.200 22.125 24.400
## 7          1526.00          2034.00 11.000 13.600 17.850 24.025 28.200
## 8          1710.50          2359.00  9.900 13.850 17.450 24.625 29.300
## 9          1555.25          2051.00 10.600 13.700 15.400 23.200 27.100
## 10         1405.75          1716.00 11.900 12.500 14.350 15.575 16.400
##     RH.0% RH.25% RH.50% RH.75% RH.100%    AH.0%   AH.25%   AH.50%   AH.75%
## 1  47.700 50.175 56.600 59.500  60.000 0.725500 0.752100 0.771300 0.786225
## 2  56.200 59.300 60.700 67.650  81.100 0.665700 0.741475 0.762550 0.814600
## 3  34.300 38.100 54.750 62.450  65.900 0.619500 0.626175 0.646850 0.668425
## 4  31.300 40.200 48.950 63.950  71.900 0.688700 0.703450 0.716550 0.748050
## 5  28.400 37.900 51.250 60.275  67.600 0.751600 0.843150 0.853450 0.872900
## 6  28.900 35.075 54.500 61.925  70.500 0.873600 0.905525 0.920000 0.949300
## 7  18.600 27.075 44.400 62.375  68.500 0.701400 0.825575 0.872450 0.929775
## 8  14.900 26.350 43.350 56.050  65.200 0.523700 0.795750 0.825950 0.872700
## 9  17.800 28.575 49.500 58.075  64.200 0.627500 0.800825 0.821450 0.855775
## 10 47.100 50.525 57.350 66.425  71.100 0.864000 0.894625 0.913100 0.958475
##     AH.100% CO_GT.0% CO_GT.25% CO_GT.50% CO_GT.75% CO_GT.100%
## 1  0.788800    1.200     1.700     2.100     2.200      2.600
## 2  0.877800 -200.000     1.000     1.650     2.375      6.900
## 3  0.777100 -200.000     1.400     2.600     3.550      6.600
## 4  0.819300 -200.000     1.850     2.650     3.300      4.200
## 5  0.934100 -200.000     1.750     2.300     2.825      5.900
## 6  1.094500 -200.000     1.800     2.850     4.525      8.100
## 7  1.049400 -200.000     1.275     2.900     3.725      5.300
## 8  0.929400 -200.000     1.500     2.600     4.150      7.600
## 9  0.960600 -200.000     1.475     2.250     3.325      6.600
## 10 1.002900 -200.000     1.575     3.150     4.000      6.200

Conclusion

As can be seen from the mean values of CO(GT) is worst in October, 2004 and best in Feb, 2005 for the given sample of data

Data wrangling

Please perform some basic transformations. They will need to make sense but could include column renaming, creating a subset of the data, replacing values, or creating new columns with derived data (for example - if it makes sense you could sum two columns together)

myGitHubDataFrame <- myGitHubDataFrame %>% mutate(month=month(Date),year=year(Date),JulianDay=julian(as.Date(as.character(as.POSIXct(Date)))))
myGitHubDataFrame_month <- myGitHubDataFrame %>% group_by(month,year) %>% summarise(mean_CO_GT_month = mean(CO_GT))
myGitHubDataFrame_month
## # A tibble: 14 x 3
## # Groups:   month [?]
##    month  year mean_CO_GT_month
##    <dbl> <dbl>            <dbl>
##  1     1  2005          -16.3  
##  2     2  2005           -0.610
##  3     3  2004           -4.85 
##  4     3  2005           -3.39 
##  5     4  2004          -60.9  
##  6     4  2005           -3.42 
##  7     5  2004          -39.3  
##  8     6  2004          -19.4  
##  9     7  2004          -48.7  
## 10     8  2004          -71.0  
## 11     9  2004          -43.9  
## 12    10  2004          -92.9  
## 13    11  2004           -8.86 
## 14    12  2004          -25.6
myGitHubDataFrame_day <- myGitHubDataFrame %>% group_by(Date) %>% summarise(mean_CO_GT_day = mean(CO_GT))
myGitHubDataFrame_day <- myGitHubDataFrame_day %>% mutate(year = year(Date), month = month(Date), day=as.Date(as.character(as.POSIXct(Date))) )
head(myGitHubDataFrame_day,n=10)
## # A tibble: 10 x 5
##    Date                mean_CO_GT_day  year month day       
##    <dttm>                       <dbl> <dbl> <dbl> <date>    
##  1 2004-03-10 00:00:00           1.97  2004     3 2004-03-10
##  2 2004-03-11 00:00:00          -6.19  2004     3 2004-03-11
##  3 2004-03-12 00:00:00         -14.1   2004     3 2004-03-12
##  4 2004-03-13 00:00:00          -5.75  2004     3 2004-03-13
##  5 2004-03-14 00:00:00          -5.97  2004     3 2004-03-14
##  6 2004-03-15 00:00:00          -4.97  2004     3 2004-03-15
##  7 2004-03-16 00:00:00          -5.68  2004     3 2004-03-16
##  8 2004-03-17 00:00:00          -5.39  2004     3 2004-03-17
##  9 2004-03-18 00:00:00          -5.75  2004     3 2004-03-18
## 10 2004-03-19 00:00:00          -5.49  2004     3 2004-03-19
myGitHubDataFrame_JulianDay <- myGitHubDataFrame_day %>% mutate(JulianDay = julian(day))
head(myGitHubDataFrame_JulianDay,n=10)
## # A tibble: 10 x 6
##    Date                mean_CO_GT_day  year month day        JulianDay
##    <dttm>                       <dbl> <dbl> <dbl> <date>         <dbl>
##  1 2004-03-10 00:00:00           1.97  2004     3 2004-03-10     12487
##  2 2004-03-11 00:00:00          -6.19  2004     3 2004-03-11     12488
##  3 2004-03-12 00:00:00         -14.1   2004     3 2004-03-12     12489
##  4 2004-03-13 00:00:00          -5.75  2004     3 2004-03-13     12490
##  5 2004-03-14 00:00:00          -5.97  2004     3 2004-03-14     12491
##  6 2004-03-15 00:00:00          -4.97  2004     3 2004-03-15     12492
##  7 2004-03-16 00:00:00          -5.68  2004     3 2004-03-16     12493
##  8 2004-03-17 00:00:00          -5.39  2004     3 2004-03-17     12494
##  9 2004-03-18 00:00:00          -5.75  2004     3 2004-03-18     12495
## 10 2004-03-19 00:00:00          -5.49  2004     3 2004-03-19     12496

Graphics

Please make sure to display at least one scatter plot, box plot and histogram. Don’t be limited to this. Please explore the many other options in R packages such as ggplot2.

myGitHubDataFrame %>%
ggplot(aes(x = Date, y = CO_GT)) +
      geom_point(color = "darkorchid4") +
      labs(title = "Air Quality CO Daily",
           subtitle = "The data frame is sent to the plot using pipes",
           y = "Daily CO values",
           x = "Date") + theme_bw(base_size = 20)

myGitHubDataFrame_month %>%
ggplot(aes(x = month, y = mean_CO_GT_month)) +
      geom_point(color = "darkorchid4") +
      labs(title = "Air Quality CO Monthly",
           subtitle = "The data frame is sent to the plot using pipes",
           y = "Monthly CO values",
           x = "Month") + theme_bw(base_size = 20)

myGitHubDataFrame_day %>%
  na.omit() %>%
ggplot(aes(x = day, y = mean_CO_GT_day)) +
      geom_point(color = "darkorchid4") +
      facet_wrap( ~ year ) +
      labs(title = "Air Quality CO",
           subtitle = "Use facets to plot by a variable - year in this case",
           y = "Daily CO values",
           x = "Date") + theme_bw(base_size = 20) +
     # adjust the x axis breaks
     scale_x_date(date_breaks = "5 years", date_labels = "%m-%Y")

myGitHubDataFrame_JulianDay %>%
ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
      geom_point(color = "darkorchid4") +
      facet_wrap( ~ year, ncol = 3) +
      labs(title = "Air Quality CO",
           subtitle = "Data plotted by year",
           y = "Daily CO values",
           x = "Day of Year") + theme_bw(base_size = 15)

myGitHubDataFrame_JulianDay %>%
  filter(JulianDay >= 12487 & JulianDay <= 12783) %>% ## origin date = "1970-01-01"
  ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
      geom_bar(stat = "identity", fill = "darkorchid4") +
      facet_wrap( ~ year, ncol = 3) +
      labs(title = "Air Quality CO",
           subtitle = "Data plotted by year",
           y = "Daily CO values by year",
           x = "Julian Date") + theme_bw(base_size = 15)

myGitHubDataFrame_JulianDay %>%
  ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
      geom_bar(stat = "identity", fill = "darkorchid4") +
      facet_wrap( ~ month, ncol = 3) +
      labs(title = "Air Quality CO",
           subtitle = "Data plotted by Month",
           y = "Daily CO values by month",
           x = "Julian Date") + theme_bw(base_size = 15)

myGitHubDataFrame_month %>%
  ggplot(aes(x = month, y = mean_CO_GT_month)) +
      geom_bar(stat = "identity", fill = "darkorchid4") +
  facet_wrap(~ year, ncol = 3) +
      labs(title = "Monthly Air Quality CO",
           subtitle = "Data plotted by Year",
           y = "Monthly CO values",
           x = "Month") + theme_bw(base_size = 15)

myGitHubDataFrame_JulianDay %>%
  filter(JulianDay >= 12692 & JulianDay <= 12722) %>% ## origin date = "1970-01-01" - Month of October 2004
  ggplot(aes(x = JulianDay, y = mean_CO_GT_day)) +
      geom_bar(stat = "identity", fill = "darkorchid4") +
      facet_wrap( ~ month, ncol = 3) +
      labs(title = "Daily Air Quality CO",
           subtitle = "Data plotted for October 2004 by Day",
           y = "Daily CO values by month",
           x = "Julian Date") + theme_bw(base_size = 15)

myGitHubDataFrame %>%
  filter((JulianDay>=12693 & JulianDay<=12697) | (JulianDay>=12705 & JulianDay<=12710)) %>% #10/02/2004-10/06/2004 OR 10/14/20004-10/19/2004
  ggplot(aes(x = Time, y = CO_GT)) +
      geom_bar(stat = "identity", fill = "darkorchid4") +
      facet_wrap( ~ JulianDay, ncol = 3) +
      labs(title = "Hourly Air Quality CO",
           subtitle = "Data plotted for October 2004 by Time",
           y = "Daily CO values by month",
           x = "Julian Date") + theme_bw(base_size = 5)

Meaningful question for analysis

Please state at the beginning a meaningful question for analysis. Use the first three steps and anything else that would be helpful to answer the question you are posing from the data set you chose. Please write a brief conclusion paragraph in R markdown at the end.

What is the worst month in terms of Air Quality for CO

As can be inferred from the latest yearly bar chart, the month October in 2004 had the worst Air Quality

Which days in the worst month the Air Quality for CO is consistently bad day-on-day

As can be inferred from the latest monthly bar chart for October month, the days 2-6 and 14-19 were the worst days in the month of October 2004

Which time period in a day does the Air Quality for CO is consistently bad day-on-day in the worst month

As can be inferred from latest hourly bar chart for the worst days in October month, it seems entire day for all the 24 hours the air quality for CO content is the worst throughout the day