Import libraries

library(dplyr)
library(tidyr)
library(qcc) # for pareto-analysis

Analysis of DATA 1

I got the first data from the student: Sean Amato. I saved that on my github after I downloaded that on Slack.

Collect the data

df1 = read.csv('https://raw.githubusercontent.com/Kossi-Akplaka/Data607-data_acquisition_and_management/main/Project%202/Data1-Manufacturing.csv', header = TRUE)
df1
##   Work.Orders                 X Work.Order.Status        X.1      X.2       X.3
## 1    Building              Type          NOASSIGN WORKASSIGN WORKPLAN WORKSCHED
## 2          B7  Prev Maintenance                16         44       47        93
## 3             Corr Maintenacnce                20          7        8         4
## 4         B23  Prev Maintenance                75         47       24        77
## 5             Corr Maintenacnce                 8         20        8         3
## 6         B30  Prev Maintenance                59         10       15        54
## 7             Corr Maintenacnce                15         11       18         4
##      X.4  X.5
## 1 INPROG COMP
## 2     46 2367
## 3     16  326
## 4     41 1482
## 5     18  526
## 6     51 2345
## 7      8  123

Tidy the data

First we can remove the header of the data frame and make the second column as a header

colnames(df1) <- as.character(unlist(df1[1, ]))
df1 <- df1[-1, ] 
df1
##   Building              Type NOASSIGN WORKASSIGN WORKPLAN WORKSCHED INPROG COMP
## 2       B7  Prev Maintenance       16         44       47        93     46 2367
## 3          Corr Maintenacnce       20          7        8         4     16  326
## 4      B23  Prev Maintenance       75         47       24        77     41 1482
## 5          Corr Maintenacnce        8         20        8         3     18  526
## 6      B30  Prev Maintenance       59         10       15        54     51 2345
## 7          Corr Maintenacnce       15         11       18         4      8  123

Now, we can fill up the missing value for Building

df1[2,1] = 'B7'
df1[4,1] = 'B23'
df1[6,1] = 'B30'

df1
##   Building              Type NOASSIGN WORKASSIGN WORKPLAN WORKSCHED INPROG COMP
## 2       B7  Prev Maintenance       16         44       47        93     46 2367
## 3       B7 Corr Maintenacnce       20          7        8         4     16  326
## 4      B23  Prev Maintenance       75         47       24        77     41 1482
## 5      B23 Corr Maintenacnce        8         20        8         3     18  526
## 6      B30  Prev Maintenance       59         10       15        54     51 2345
## 7      B30 Corr Maintenacnce       15         11       18         4      8  123

Now, we can transform the data from wide to long format.

colnames(df1)
## [1] "Building"   "Type"       "NOASSIGN"   "WORKASSIGN" "WORKPLAN"  
## [6] "WORKSCHED"  "INPROG"     "COMP"
df1_long <- df1 %>% 
  pivot_longer(cols = c( "NOASSIGN",  "WORKASSIGN","WORKPLAN",
                         "WORKSCHED"  ,"INPROG"  ,   "COMP"),
               names_to = "Order_Status",
               values_to = "Count")

head(df1_long, 7)
## # A tibble: 7 × 4
##   Building Type              Order_Status Count
##   <chr>    <chr>             <chr>        <chr>
## 1 B7       Prev Maintenance  NOASSIGN     16   
## 2 B7       Prev Maintenance  WORKASSIGN   44   
## 3 B7       Prev Maintenance  WORKPLAN     47   
## 4 B7       Prev Maintenance  WORKSCHED    93   
## 5 B7       Prev Maintenance  INPROG       46   
## 6 B7       Prev Maintenance  COMP         2367 
## 7 B7       Corr Maintenacnce NOASSIGN     20

Data analysis

We have a tidy data ready for analysis. We can plot a Pareto chart. you can find more detail how to create a Pareto chart in this article

df1_long$Count <- as.numeric(df1_long$Count)
pareto.chart(df1_long$Count)

##     
## Pareto chart analysis for df1_long$Count
##         Frequency    Cum.Freq.   Percentage Cum.Percent.
##   F  2.367000e+03 2.367000e+03 2.945495e+01 2.945495e+01
##   D1 2.345000e+03 4.712000e+03 2.918118e+01 5.863614e+01
##   R  1.482000e+03 6.194000e+03 1.844201e+01 7.707815e+01
##   X  5.260000e+02 6.720000e+03 6.545545e+00 8.362369e+01
##   L  3.260000e+02 7.046000e+03 4.056745e+00 8.768044e+01
##   J1 1.230000e+02 7.169000e+03 1.530612e+00 8.921105e+01
##   D  9.300000e+01 7.262000e+03 1.157292e+00 9.036834e+01
##   P  7.700000e+01 7.339000e+03 9.581882e-01 9.132653e+01
##   M  7.500000e+01 7.414000e+03 9.333001e-01 9.225983e+01
##   Y  5.900000e+01 7.473000e+03 7.341961e-01 9.299403e+01
##   B1 5.400000e+01 7.527000e+03 6.719761e-01 9.366600e+01
##   C1 5.100000e+01 7.578000e+03 6.346441e-01 9.430065e+01
##   C  4.700000e+01 7.625000e+03 5.848681e-01 9.488552e+01
##   N  4.700000e+01 7.672000e+03 5.848681e-01 9.547038e+01
##   E  4.600000e+01 7.718000e+03 5.724241e-01 9.604281e+01
##   B  4.400000e+01 7.762000e+03 5.475361e-01 9.659034e+01
##   Q  4.100000e+01 7.803000e+03 5.102041e-01 9.710055e+01
##   O  2.400000e+01 7.827000e+03 2.986560e-01 9.739920e+01
##   G  2.000000e+01 7.847000e+03 2.488800e-01 9.764808e+01
##   T  2.000000e+01 7.867000e+03 2.488800e-01 9.789696e+01
##   W  1.800000e+01 7.885000e+03 2.239920e-01 9.812096e+01
##   G1 1.800000e+01 7.903000e+03 2.239920e-01 9.834495e+01
##   A  1.600000e+01 7.919000e+03 1.991040e-01 9.854405e+01
##   K  1.600000e+01 7.935000e+03 1.991040e-01 9.874316e+01
##   A1 1.500000e+01 7.950000e+03 1.866600e-01 9.892982e+01
##   E1 1.500000e+01 7.965000e+03 1.866600e-01 9.911648e+01
##   F1 1.100000e+01 7.976000e+03 1.368840e-01 9.925336e+01
##   Z  1.000000e+01 7.986000e+03 1.244400e-01 9.937780e+01
##   I  8.000000e+00 7.994000e+03 9.955202e-02 9.947735e+01
##   S  8.000000e+00 8.002000e+03 9.955202e-02 9.957690e+01
##   U  8.000000e+00 8.010000e+03 9.955202e-02 9.967646e+01
##   I1 8.000000e+00 8.018000e+03 9.955202e-02 9.977601e+01
##   H  7.000000e+00 8.025000e+03 8.710801e-02 9.986312e+01
##   J  4.000000e+00 8.029000e+03 4.977601e-02 9.991289e+01
##   H1 4.000000e+00 8.033000e+03 4.977601e-02 9.996267e+01
##   V  3.000000e+00 8.036000e+03 3.733201e-02 1.000000e+02
LS0tDQp0aXRsZTogIlByb2plY3QgMi0gZGF0YTEiDQphdXRob3I6ICJLb3NzaSBBa3BsYWthIg0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiBvcGVuaW50cm86OmxhYl9yZXBvcnQNCi0tLQ0KDQojIyBJbXBvcnQgbGlicmFyaWVzDQpgYGB7ciBpbXBvcnQtbGlicmFyaWVzLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXIpDQpsaWJyYXJ5KHFjYykgIyBmb3IgcGFyZXRvLWFuYWx5c2lzDQpgYGANCg0KDQojIyBBbmFseXNpcyBvZiBEQVRBIDENCkkgZ290IHRoZSBmaXJzdCBkYXRhIGZyb20gdGhlIHN0dWRlbnQ6IFNlYW4gQW1hdG8uIEkgc2F2ZWQgdGhhdCBvbiBteSBnaXRodWIgIGFmdGVyIEkgZG93bmxvYWRlZCB0aGF0IG9uIFNsYWNrLg0KDQojIyMgQ29sbGVjdCB0aGUgZGF0YQ0KYGBge3IgY29sbGVjdC1kYXRhfQ0KZGYxID0gcmVhZC5jc3YoJ2h0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9Lb3NzaS1Ba3BsYWthL0RhdGE2MDctZGF0YV9hY3F1aXNpdGlvbl9hbmRfbWFuYWdlbWVudC9tYWluL1Byb2plY3QlMjAyL0RhdGExLU1hbnVmYWN0dXJpbmcuY3N2JywgaGVhZGVyID0gVFJVRSkNCmRmMQ0KYGBgDQoNCiMjIyBUaWR5IHRoZSBkYXRhDQpGaXJzdCB3ZSBjYW4gcmVtb3ZlIHRoZSBoZWFkZXIgb2YgdGhlIGRhdGEgZnJhbWUgYW5kIG1ha2UgdGhlIHNlY29uZCBjb2x1bW4gYXMgYSBoZWFkZXINCmBgYHtyIHJlbW92ZS1hZGQtaGVhZGVyfQ0KY29sbmFtZXMoZGYxKSA8LSBhcy5jaGFyYWN0ZXIodW5saXN0KGRmMVsxLCBdKSkNCmRmMSA8LSBkZjFbLTEsIF0gDQpkZjENCmBgYA0KTm93LCB3ZSBjYW4gZmlsbCB1cCB0aGUgbWlzc2luZyB2YWx1ZSBmb3IgQnVpbGRpbmcNCmBgYHtyIG1pc3NpbmctdmFsdWVzfQ0KZGYxWzIsMV0gPSAnQjcnDQpkZjFbNCwxXSA9ICdCMjMnDQpkZjFbNiwxXSA9ICdCMzAnDQoNCmRmMQ0KYGBgDQoNCk5vdywgd2UgY2FuIHRyYW5zZm9ybSB0aGUgZGF0YSBmcm9tIHdpZGUgdG8gbG9uZyBmb3JtYXQuDQoNCmBgYHtyIHdpZGUtdG8tbG9uZ30NCmNvbG5hbWVzKGRmMSkNCg0KZGYxX2xvbmcgPC0gZGYxICU+JSANCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSBjKCAiTk9BU1NJR04iLCAgIldPUktBU1NJR04iLCJXT1JLUExBTiIsDQogICAgICAgICAgICAgICAgICAgICAgICAgIldPUktTQ0hFRCIgICwiSU5QUk9HIiAgLCAgICJDT01QIiksDQogICAgICAgICAgICAgICBuYW1lc190byA9ICJPcmRlcl9TdGF0dXMiLA0KICAgICAgICAgICAgICAgdmFsdWVzX3RvID0gIkNvdW50IikNCg0KaGVhZChkZjFfbG9uZywgNykNCmBgYA0KDQojIyMgRGF0YSBhbmFseXNpcw0KV2UgaGF2ZSBhIHRpZHkgZGF0YSByZWFkeSBmb3IgYW5hbHlzaXMuIFdlIGNhbiBwbG90IGEgUGFyZXRvIGNoYXJ0LiANCnlvdSBjYW4gZmluZCBtb3JlIGRldGFpbCBob3cgdG8gY3JlYXRlIGEgUGFyZXRvIGNoYXJ0IGluIHRoaXMgW2FydGljbGVdKCJodHRwczovL3d3dy5zdGF0b2xvZ3kub3JnL3BhcmV0by1jaGFydC1pbi1yLyIpDQpgYGB7ciBwYXJldG8tY2hhcnR9DQpkZjFfbG9uZyRDb3VudCA8LSBhcy5udW1lcmljKGRmMV9sb25nJENvdW50KQ0KcGFyZXRvLmNoYXJ0KGRmMV9sb25nJENvdW50KQ0KYGBgDQoNCg0KDQo=