Create a dataset of continous responses attributed to four factors (f1, f2, f3 and f4)

set.seed(1)
f1 <- rnorm(50, mean = 40, sd = 7)
f2 <- rnorm(50, mean = 42, sd = 8)
f3 <- rnorm(50, mean = 43, sd = 9)
f4 <- rnorm(50, mean = 44, sd = 10)

factors.df <- data.frame(cbind(f1, f2, f3, f4))

factors.df
##          f1       f2       f3       f4
## 1  35.61482 45.18485 37.41670 48.50187
## 2  41.28550 37.10379 43.37904 43.81440
## 3  34.15060 44.72896 34.80171 40.81932
## 4  51.16697 32.96510 44.42226 34.70638
## 5  42.30655 53.46419 37.10874 29.12540
## 6  34.25672 57.84320 58.90559 33.24808
## 7  43.41200 39.06223 49.45037 54.00029
## 8  45.16827 33.64692 51.19157 37.78733
## 9  44.03047 46.55776 46.45767 30.15573
## 10 37.86228 40.91956 58.13958 62.69291
## 11 50.58247 61.21294 37.27837 48.25100
## 12 42.72890 41.68608 38.84520 41.61353
## 13 35.65132 47.51791 55.89054 54.58483
## 14 24.49710 42.22402 37.14373 52.86423
## 15 47.87452 36.05381 41.13357 37.80757
## 16 39.68546 43.51034 39.46473 66.06102
## 17 39.88667 27.56033 40.12006 41.44973
## 18 46.60685 53.72444 40.48798 29.75505
## 19 45.74855 43.22603 47.44769 42.55600
## 20 44.15731 59.38089 41.40403 46.07538
## 21 46.43284 45.80408 38.44638 67.07978
## 22 45.47495 36.32043 55.08735 45.05802
## 23 40.52195 46.88581 41.06879 48.56999
## 24 26.07454 34.52722 41.38399 43.22847
## 25 44.33878 31.97093 42.09828 40.65999
## 26 39.60710 44.33157 49.41400 43.65274
## 27 38.90943 38.45367 42.33792 51.87640
## 28 29.70473 42.00884 42.66129 64.75245
## 29 36.65295 42.59473 36.86506 54.27392
## 30 42.92559 37.28383 40.08157 56.07908
## 31 49.51076 37.45065 43.54144 31.68677
## 32 39.28049 40.91857 37.69995 53.83896
## 33 42.71370 51.42470 47.78347 46.19925
## 34 39.62336 29.81147 29.33445 29.32750
## 35 30.36058 46.75157 45.75902 49.21023
## 36 37.09504 44.66360 29.17195 42.41245
## 37 37.23997 50.50480 40.29121 58.64587
## 38 39.58481 39.56653 38.24548 36.33918
## 39 47.70018 44.96015 37.13115 39.69788
## 40 45.34223 44.13679 42.48793 34.73891
## 41 38.84833 37.65984 25.77077 42.22896
## 42 38.22647 51.66294 53.58925 48.02012
## 43 44.87874 51.28322 28.01525 36.68252
## 44 43.89664 47.60171 38.82823 52.30373
## 45 35.17871 54.69467 32.95672 31.91917
## 46 35.04753 46.46789 36.24263 33.52016
## 47 42.55207 31.78726 61.78450 58.41158
## 48 45.37973 37.41388 43.15656 33.84153
## 49 39.21358 32.20310 31.42330 48.11975
## 50 46.16775 38.21279 28.23455 40.18924

Summarize the data

summary(factors.df)
##        f1              f2              f3              f4       
##  Min.   :24.50   Min.   :27.56   Min.   :25.77   Min.   :29.13  
##  1st Qu.:37.40   1st Qu.:37.42   1st Qu.:37.18   1st Qu.:36.96  
##  Median :40.90   Median :42.91   Median :40.78   Median :43.44  
##  Mean   :40.70   Mean   :42.94   Mean   :41.63   Mean   :44.77  
##  3rd Qu.:45.10   3rd Qu.:46.85   3rd Qu.:45.42   3rd Qu.:52.20  
##  Max.   :51.17   Max.   :61.21   Max.   :61.78   Max.   :67.08

Boxplots for the continous responses

boxplot(f1, f2, f3, f4)

Histogram of responses to factor f1

hist(f1)

Histogram, Normal Curve, Kernel Density Lines and Rug/Lineplots for responses to factor f1

v1 <- factors.df$f1

h <- hist(v1,
          prob = TRUE,
          ylim = c(0, 0.12),
          xlim = c(20, 55),
          breaks = 11,
          col = "#E5E5E5",
          border = 0,
          main = "Histogram of variable v1")

curve(dnorm(x, mean = mean(v1), sd = sd(v1)), 
      col = "red", 
      lwd = 3,
      add = TRUE)

lines(density(v1), col = "blue")

lines(density(v1, adjust = 3), col = "darkgreen")

rug(v1, col = "red")

Long form of the data

stack.v <- stack(factors.df) 

stack.v
##       values ind
## 1   35.61482  f1
## 2   41.28550  f1
## 3   34.15060  f1
## 4   51.16697  f1
## 5   42.30655  f1
## 6   34.25672  f1
## 7   43.41200  f1
## 8   45.16827  f1
## 9   44.03047  f1
## 10  37.86228  f1
## 11  50.58247  f1
## 12  42.72890  f1
## 13  35.65132  f1
## 14  24.49710  f1
## 15  47.87452  f1
## 16  39.68546  f1
## 17  39.88667  f1
## 18  46.60685  f1
## 19  45.74855  f1
## 20  44.15731  f1
## 21  46.43284  f1
## 22  45.47495  f1
## 23  40.52195  f1
## 24  26.07454  f1
## 25  44.33878  f1
## 26  39.60710  f1
## 27  38.90943  f1
## 28  29.70473  f1
## 29  36.65295  f1
## 30  42.92559  f1
## 31  49.51076  f1
## 32  39.28049  f1
## 33  42.71370  f1
## 34  39.62336  f1
## 35  30.36058  f1
## 36  37.09504  f1
## 37  37.23997  f1
## 38  39.58481  f1
## 39  47.70018  f1
## 40  45.34223  f1
## 41  38.84833  f1
## 42  38.22647  f1
## 43  44.87874  f1
## 44  43.89664  f1
## 45  35.17871  f1
## 46  35.04753  f1
## 47  42.55207  f1
## 48  45.37973  f1
## 49  39.21358  f1
## 50  46.16775  f1
## 51  45.18485  f2
## 52  37.10379  f2
## 53  44.72896  f2
## 54  32.96510  f2
## 55  53.46419  f2
## 56  57.84320  f2
## 57  39.06223  f2
## 58  33.64692  f2
## 59  46.55776  f2
## 60  40.91956  f2
## 61  61.21294  f2
## 62  41.68608  f2
## 63  47.51791  f2
## 64  42.22402  f2
## 65  36.05381  f2
## 66  43.51034  f2
## 67  27.56033  f2
## 68  53.72444  f2
## 69  43.22603  f2
## 70  59.38089  f2
## 71  45.80408  f2
## 72  36.32043  f2
## 73  46.88581  f2
## 74  34.52722  f2
## 75  31.97093  f2
## 76  44.33157  f2
## 77  38.45367  f2
## 78  42.00884  f2
## 79  42.59473  f2
## 80  37.28383  f2
## 81  37.45065  f2
## 82  40.91857  f2
## 83  51.42470  f2
## 84  29.81147  f2
## 85  46.75157  f2
## 86  44.66360  f2
## 87  50.50480  f2
## 88  39.56653  f2
## 89  44.96015  f2
## 90  44.13679  f2
## 91  37.65984  f2
## 92  51.66294  f2
## 93  51.28322  f2
## 94  47.60171  f2
## 95  54.69467  f2
## 96  46.46789  f2
## 97  31.78726  f2
## 98  37.41388  f2
## 99  32.20310  f2
## 100 38.21279  f2
## 101 37.41670  f3
## 102 43.37904  f3
## 103 34.80171  f3
## 104 44.42226  f3
## 105 37.10874  f3
## 106 58.90559  f3
## 107 49.45037  f3
## 108 51.19157  f3
## 109 46.45767  f3
## 110 58.13958  f3
## 111 37.27837  f3
## 112 38.84520  f3
## 113 55.89054  f3
## 114 37.14373  f3
## 115 41.13357  f3
## 116 39.46473  f3
## 117 40.12006  f3
## 118 40.48798  f3
## 119 47.44769  f3
## 120 41.40403  f3
## 121 38.44638  f3
## 122 55.08735  f3
## 123 41.06879  f3
## 124 41.38399  f3
## 125 42.09828  f3
## 126 49.41400  f3
## 127 42.33792  f3
## 128 42.66129  f3
## 129 36.86506  f3
## 130 40.08157  f3
## 131 43.54144  f3
## 132 37.69995  f3
## 133 47.78347  f3
## 134 29.33445  f3
## 135 45.75902  f3
## 136 29.17195  f3
## 137 40.29121  f3
## 138 38.24548  f3
## 139 37.13115  f3
## 140 42.48793  f3
## 141 25.77077  f3
## 142 53.58925  f3
## 143 28.01525  f3
## 144 38.82823  f3
## 145 32.95672  f3
## 146 36.24263  f3
## 147 61.78450  f3
## 148 43.15656  f3
## 149 31.42330  f3
## 150 28.23455  f3
## 151 48.50187  f4
## 152 43.81440  f4
## 153 40.81932  f4
## 154 34.70638  f4
## 155 29.12540  f4
## 156 33.24808  f4
## 157 54.00029  f4
## 158 37.78733  f4
## 159 30.15573  f4
## 160 62.69291  f4
## 161 48.25100  f4
## 162 41.61353  f4
## 163 54.58483  f4
## 164 52.86423  f4
## 165 37.80757  f4
## 166 66.06102  f4
## 167 41.44973  f4
## 168 29.75505  f4
## 169 42.55600  f4
## 170 46.07538  f4
## 171 67.07978  f4
## 172 45.05802  f4
## 173 48.56999  f4
## 174 43.22847  f4
## 175 40.65999  f4
## 176 43.65274  f4
## 177 51.87640  f4
## 178 64.75245  f4
## 179 54.27392  f4
## 180 56.07908  f4
## 181 31.68677  f4
## 182 53.83896  f4
## 183 46.19925  f4
## 184 29.32750  f4
## 185 49.21023  f4
## 186 42.41245  f4
## 187 58.64587  f4
## 188 36.33918  f4
## 189 39.69788  f4
## 190 34.73891  f4
## 191 42.22896  f4
## 192 48.02012  f4
## 193 36.68252  f4
## 194 52.30373  f4
## 195 31.91917  f4
## 196 33.52016  f4
## 197 58.41158  f4
## 198 33.84153  f4
## 199 48.11975  f4
## 200 40.18924  f4

Analyze the variance due to the factors and random error

anova1 <- aov(values ~ ind, data = stack.v) # Conduct one-way ANOVA
anova1
## Call:
##    aov(formula = values ~ ind, data = stack.v)
## 
## Terms:
##                       ind Residuals
## Sum of Squares    466.436 12801.533
## Deg. of Freedom         3       196
## 
## Residual standard error: 8.081704
## Estimated effects may be unbalanced
summary(anova1)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## ind           3    466  155.48    2.38 0.0709 .
## Residuals   196  12802   65.31                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Post-hoc comparisons using Tukey’s HSD (higest significant difference)(controls for experimentwise error)

TukeyHSD(anova1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = values ~ ind, data = stack.v)
## 
## $ind
##             diff        lwr      upr     p adj
## f2-f1  2.2354737 -1.9528008 6.423748 0.5114759
## f3-f1  0.9244931 -3.2637814 5.112768 0.9403319
## f4-f1  4.0655549 -0.1227196 8.253829 0.0606555
## f3-f2 -1.3109806 -5.4992551 2.877294 0.8491824
## f4-f2  1.8300812 -2.3581932 6.018356 0.6700704
## f4-f3  3.1410618 -1.0472126 7.329336 0.2134873

Save the data frame as a text file (tab separated) for analysis in SAS

write.table(stack.v, "stack_v.txt", sep="\t")

Import the saved data file and create a SAS dataset

%let path=/folders/myshortcuts/Programming-R/MARKDOWNS;
libname stat "&path";

proc import datafile="&path/stack_v.txt" dbms=tab out=stat.stack_v replace; 
run;

data stat.stack_v_new;
    set stat.stack_v (rename=(VAR3=groups));
run;

Check the creation of SAS dataset in R

library(haven)
data <- read_sas("stack_v_new.sas7bdat")
data
## # A tibble: 200 x 3
##    values   ind groups
##    <chr>  <dbl> <chr> 
##  1 1       35.6 f1    
##  2 2       41.3 f1    
##  3 3       34.2 f1    
##  4 4       51.2 f1    
##  5 5       42.3 f1    
##  6 6       34.3 f1    
##  7 7       43.4 f1    
##  8 8       45.2 f1    
##  9 9       44.0 f1    
## 10 10      37.9 f1    
## # ... with 190 more rows

Analyze the SAS dataset using proc ANOVA

proc anova data=stat.stack_v_new;
    class groups;
    model ind=groups;
run;

image.

Analyze the SAS dataset using proc GLM (Generalized Linear Model)

proc glm data=stat.stack_v_new;
    class groups;
    model ind=groups / solution;
    means groups;
run;

image.

Analysis of the same dataset using JMP

image.

Analysis of the residuals in JMP

image.