################################################################################
#### R Coding Project -- Starter Code                                       ####
################################################################################
library(car)
## Loading required package: carData
## The read.csv command below imports the `digital` data frame into R.  
## You should see digital in the Global Environment pane with 298 observations
## across 31 variables to start. Select and run the first 55 lines one time.

digital <- read.csv("https://www.crc.nd.edu/~jakosa/ITAO_20200/Data/2022_Digital_Literacy.csv")

##########################################
#### Data Management                  ####
##########################################

## Shorten a few variable names
digital$Forms <- digital$Filling_out_forms_on_the_Internet
digital$SearchInternet <- digital$Searching_for_information_on_the_Internet

## Make certain variables into factors
digital$Gender <- as.factor(digital$Gender)
digital$Age <- as.factor(digital$Age)
digital$Education <- as.factor(digital$Highest_level_of_education)
digital$Income <- as.factor(digital$Annual_household_income)

digital$Education <- factor(
  digital$Education, 
  levels = c("No formal schooling", 
             "Some high school", 
             "High school diploma or G.E.D.",
             "Some college", 
             "Associate's degree",
             "Bachelor's degree", 
             "Master's degree",
             "Professional or Doctorate degree"))


digital$Income <- factor(
  digital$Income, 
  levels = c("Under $15,000","$15,000 to $29,999",
             "$30,000 to $59,999","$60,000 to $99,999 ",
             "$100,000 to $124,999","$125,000 or more"))

#===============================================================================
# Necessary packages for the file                                              #
#===============================================================================



##============================================================================##
## Refer to the R Coding Project Instructional Guide for the specifics        ##
## of each of the following analyses. Failure to read and follow the guide    ##
## might lead to incorrectly run analyses.                                    ##
##============================================================================##


#===============================================================================
# 1. Descriptive statistics for certain variables                             ##
# Write your code for these analyses in the blank spaces below.               ##
#===============================================================================
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
# Use the `describe` command to generate descriptive stats 
# for the variable digital$SearchInternet
describe(digital$SearchInternet)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 298 4.14 1.43      5    4.42   0   1   5     4 -1.4     0.34 0.08
# Use the `describe` command to generate descriptive stats 
# for the variable digital$Using_safe_online_practices_privacy_and_security
describe(digital$Using_safe_online_practices_privacy_and_security)
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 298 3.91 1.34      4    4.13 1.48   1   5     4 -1.08    -0.11 0.08
# Use the `describeBy` command to generate descriptive stats
# for the variable digital$Forms by digital$Gender
describeBy(digital$Forms, digital$Gender)
## 
##  Descriptive statistics by group 
## group: Man
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 97 4.14 1.33      5    4.41   0   1   5     4 -1.36     0.46 0.14
## ------------------------------------------------------------ 
## group: Woman
##    vars   n mean   sd median trimmed mad min max range  skew kurtosis  se
## X1    1 201 3.93 1.46      5    4.16   0   1   5     4 -1.06    -0.44 0.1
# Use the `describeBy` command to generate descriptive stats for the
# variable digital$Online_shopping by digital$Income
describeBy(digital$Online_shopping, digital$Income)
## 
##  Descriptive statistics by group 
## group: Under $15,000
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 67 3.66 1.48      4     3.8 1.48   1   5     4 -0.7    -1.01 0.18
## ------------------------------------------------------------ 
## group: $15,000 to $29,999
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 49  4.1 1.36      5    4.32   0   1   5     4 -1.31     0.27 0.19
## ------------------------------------------------------------ 
## group: $30,000 to $59,999
##    vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 110 4.15 1.34      5    4.44   0   1   5     4 -1.47     0.77 0.13
## ------------------------------------------------------------ 
## group: $60,000 to $99,999 
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 45 4.33 1.26      5    4.59   0   1   5     4 -1.62     1.17 0.19
## ------------------------------------------------------------ 
## group: $100,000 to $124,999
##    vars n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 9 4.11 1.54      5    4.11   0   1   5     4 -1.08    -0.66 0.51
## ------------------------------------------------------------ 
## group: $125,000 or more
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 18 4.17 1.54      5    4.31   0   1   5     4 -1.35     0.03 0.36
# Use the `describeBy` command to generate descriptive stats for the 
# variable digital$Sending_and_receiving_emails by digital$Education
describeBy(digital$Sending_and_receiving_emails, digital$Education)
## 
##  Descriptive statistics by group 
## group: No formal schooling
##    vars n mean  sd median trimmed mad min max range skew kurtosis   se
## X1    1 4 1.75 1.5      1    1.75   0   1   4     3 0.75    -1.69 0.75
## ------------------------------------------------------------ 
## group: Some high school
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 14    3 1.84      3       3 2.97   1   5     4    0    -1.98 0.49
## ------------------------------------------------------------ 
## group: High school diploma or G.E.D.
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 51 4.16 1.33      5    4.44   0   1   5     4 -1.38     0.52 0.19
## ------------------------------------------------------------ 
## group: Some college
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 63 4.25 1.34      5    4.55   0   1   5     4 -1.72     1.46 0.17
## ------------------------------------------------------------ 
## group: Associate's degree
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 26 4.15 1.49      5    4.36   0   1   5     4 -1.37      0.2 0.29
## ------------------------------------------------------------ 
## group: Bachelor's degree
##    vars  n mean   sd median trimmed mad min max range skew kurtosis  se
## X1    1 79 4.67 0.93      5    4.92   0   1   5     4   -3     8.11 0.1
## ------------------------------------------------------------ 
## group: Master's degree
##    vars  n mean   sd median trimmed mad min max range skew kurtosis  se
## X1    1 50 4.36 1.44      5     4.7   0   1   5     4 -1.8     1.33 0.2
## ------------------------------------------------------------ 
## group: Professional or Doctorate degree
##    vars  n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 11 4.73 0.65      5    4.89   0   3   5     2 -1.8      1.8 0.19
# Use the `describeBy` command to generate descriptive stats for the
# variable digital$Total_Percent_Score by digital$Age
describeBy(digital$Total_Percent_Score, digital$Age)
## 
##  Descriptive statistics by group 
## group: 18-29
##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 50 0.87 0.22   0.96    0.93 0.05 0.2   1   0.8 -2.13     3.36 0.03
## ------------------------------------------------------------ 
## group: 30-39
##    vars  n mean  sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 65 0.85 0.2   0.94    0.89 0.09 0.2   1   0.8 -1.77     2.73 0.02
## ------------------------------------------------------------ 
## group: 40-49
##    vars  n mean  sd median trimmed  mad  min max range  skew kurtosis   se
## X1    1 45 0.82 0.2   0.88    0.86 0.16 0.35   1  0.65 -1.21     0.32 0.03
## ------------------------------------------------------------ 
## group: 50-59
##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 51 0.69 0.27   0.74    0.71 0.31 0.2   1   0.8 -0.48    -1.08 0.04
## ------------------------------------------------------------ 
## group: 60-69
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 45 0.71 0.24   0.75    0.73 0.23 0.2   1   0.8 -0.6    -0.73 0.04
## ------------------------------------------------------------ 
## group: 70-79
##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 35 0.72 0.25   0.79    0.74 0.22 0.2   1   0.8 -0.75    -0.68 0.04
## ------------------------------------------------------------ 
## group: 80+
##    vars n mean   sd median trimmed mad min  max range skew kurtosis   se
## X1    1 7 0.53 0.35    0.4    0.53 0.3 0.2 0.98  0.78 0.19     -2.1 0.13
t.test(digital$SearchInternet, mu = 4, alternative = "greater")
## 
##  One Sample t-test
## 
## data:  digital$SearchInternet
## t = 1.7427, df = 297, p-value = 0.04121
## alternative hypothesis: true mean is greater than 4
## 95 percent confidence interval:
##  4.007674      Inf
## sample estimates:
## mean of x 
##  4.144295
sd(digital$SearchInternet, na.rm = TRUE)
## [1] 1.429364
#===============================================================================
# 2. Single Sample t-Test for digital$SearchInternet according to the         ##
# details in the instructional guide.                                         ##
# Write your code for this analysis in the blank space below.                 ##
#===============================================================================
t.test(digital$SearchInternet,
       alternative = "greater",
       mu = 4,
       conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  digital$SearchInternet
## t = 1.7427, df = 297, p-value = 0.04121
## alternative hypothesis: true mean is greater than 4
## 95 percent confidence interval:
##  4.007674      Inf
## sample estimates:
## mean of x 
##  4.144295
#===============================================================================
# 3. Single Sample t-Test for                                                 ##
# digital$Using_safe_online_practices_privacy_and_security according to the   ##
# details in the instructional guide.                                         ##
# Write your code for this analysis in the blank space below.                 ##
#===============================================================================
t.test(digital$Using_safe_online_practices_privacy_and_security,
       alternative = "less",
       mu = 3,
       conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  digital$Using_safe_online_practices_privacy_and_security
## t = 11.732, df = 297, p-value = 1
## alternative hypothesis: true mean is less than 3
## 95 percent confidence interval:
##      -Inf 4.041124
## sample estimates:
## mean of x 
##  3.912752
#===============================================================================
# 4. Independent t Test for digital$Forms by digital$Gender according to the  ##
# details in the instructional guide.                                         ##
# Perform Levene's Test for Equality of Variance first.                       ##
# Write your code for these analyses in the blank spaces below.               ##
#===============================================================================

## Test for the equality of variance
leveneTest(y = digital$Forms, group = digital$Gender, center = mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  2.1151 0.1469
##       296
## Choose the appropriate independent t test based on your Levene's test and put
## the correct lines of codes in the space below.
t.test(Forms ~ Gender,
       data = digital,
       alternative = "two.sided",
       mu = 0,
       conf.level = 0.95,
       var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  Forms by Gender
## t = 1.2167, df = 296, p-value = 0.2247
## alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
## 95 percent confidence interval:
##  -0.1321337  0.5600969
## sample estimates:
##   mean in group Man mean in group Woman 
##            4.144330            3.930348
#===============================================================================
# 5. One-Way ANOVA for digital$Online_shopping by digital$Income              ##
# You do not need to factor your independent variable; done in lines 1-55.    ##
# Write your code for these analyses in the blank spaces below.               ##
#===============================================================================

## One-way ANOVA
anova_result <- aov(digital$Online_shopping ~ digital$Income)
summary(anova_result)
##                 Df Sum Sq Mean Sq F value Pr(>F)
## digital$Income   5   15.6   3.111    1.63  0.152
## Residuals      292  557.4   1.909
## Post-hoc analyses using Tukey's HSD 
## (run the analysis regardless of the outcome of the omnibus F).
TukeyHSD(anova_result, conf.level = 0.95)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = digital$Online_shopping ~ digital$Income)
## 
## $`digital$Income`
##                                                  diff         lwr       upr
## $15,000 to $29,999-Under $15,000          0.445324398 -0.29968269 1.1903315
## $30,000 to $59,999-Under $15,000          0.497829037 -0.11638501 1.1120431
## $60,000 to $99,999 -Under $15,000         0.676616915 -0.08727551 1.4405093
## $100,000 to $124,999-Under $15,000        0.454394693 -0.95267300 1.8614624
## $125,000 or more-Under $15,000            0.509950249 -0.54226032 1.5621608
## $30,000 to $59,999-$15,000 to $29,999     0.052504638 -0.62821931 0.7332286
## $60,000 to $99,999 -$15,000 to $29,999    0.231292517 -0.58703373 1.0496188
## $100,000 to $124,999-$15,000 to $29,999   0.009070295 -1.42827647 1.4464171
## $125,000 or more-$15,000 to $29,999       0.064625850 -1.02774464 1.1569963
## $60,000 to $99,999 -$30,000 to $59,999    0.178787879 -0.52255453 0.8801303
## $100,000 to $124,999-$30,000 to $59,999  -0.043434343 -1.41754825 1.3306796
## $125,000 or more-$30,000 to $59,999       0.012121212 -0.99559731 1.0198397
## $100,000 to $124,999-$60,000 to $99,999  -0.222222222 -1.66944777 1.2250033
## $125,000 or more-$60,000 to $99,999      -0.166666667 -1.27200344 0.9386701
## $125,000 or more-$100,000 to $124,999     0.055555556 -1.56249180 1.6736029
##                                              p adj
## $15,000 to $29,999-Under $15,000         0.5231016
## $30,000 to $59,999-Under $15,000         0.1873155
## $60,000 to $99,999 -Under $15,000        0.1157893
## $100,000 to $124,999-Under $15,000       0.9394796
## $125,000 or more-Under $15,000           0.7328779
## $30,000 to $59,999-$15,000 to $29,999    0.9999268
## $60,000 to $99,999 -$15,000 to $29,999   0.9654061
## $100,000 to $124,999-$15,000 to $29,999  1.0000000
## $125,000 or more-$15,000 to $29,999      0.9999804
## $60,000 to $99,999 -$30,000 to $59,999   0.9779391
## $100,000 to $124,999-$30,000 to $59,999  0.9999991
## $125,000 or more-$30,000 to $59,999      1.0000000
## $100,000 to $124,999-$60,000 to $99,999  0.9978904
## $125,000 or more-$60,000 to $99,999      0.9980663
## $125,000 or more-$100,000 to $124,999    0.9999987
#===============================================================================
# 6. One-Way ANOVA and post-hoc tests for digital$Sending_and_receiving_emails 
# by digital$Education                                                        
# You do not need to factor your independent variable; done in lines 1-55.
# Write your code for these analyses in the blank spaces below.
#===============================================================================

## One-way ANOVA
anova_result <- aov(digital$Sending_and_receiving_emails ~ digital$Education)
summary(anova_result)
##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## digital$Education   7   64.4   9.194   5.555 5.11e-06 ***
## Residuals         290  480.0   1.655                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Post-hoc analyses using the Bonferroni adjusted LSD 
## (run the analysis regardless of the outcome of the omnibus F).
pairwise.t.test(digital$Sending_and_receiving_emails,
                digital$Education,
                p.adjust.method = "bonferroni",
                alternative = "two.sided")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  digital$Sending_and_receiving_emails and digital$Education 
## 
##                                  No formal schooling Some high school
## Some high school                 1.00000             -               
## High school diploma or G.E.D.    0.01036             0.08744         
## Some college                     0.00544             0.03057         
## Associate's degree               0.01625             0.20220         
## Bachelor's degree                0.00037             0.00030         
## Master's degree                  0.00329             0.01528         
## Professional or Doctorate degree 0.00260             0.02724         
##                                  High school diploma or G.E.D. Some college
## Some high school                 -                             -           
## High school diploma or G.E.D.    -                             -           
## Some college                     1.00000                       -           
## Associate's degree               1.00000                       1.00000     
## Bachelor's degree                0.75302                       1.00000     
## Master's degree                  1.00000                       1.00000     
## Professional or Doctorate degree 1.00000                       1.00000     
##                                  Associate's degree Bachelor's degree
## Some high school                 -                  -                
## High school diploma or G.E.D.    -                  -                
## Some college                     -                  -                
## Associate's degree               -                  -                
## Bachelor's degree                1.00000            -                
## Master's degree                  1.00000            1.00000          
## Professional or Doctorate degree 1.00000            1.00000          
##                                  Master's degree
## Some high school                 -              
## High school diploma or G.E.D.    -              
## Some college                     -              
## Associate's degree               -              
## Bachelor's degree                -              
## Master's degree                  -              
## Professional or Doctorate degree 1.00000        
## 
## P value adjustment method: bonferroni
library(psych)
describeBy(digital$Total_Percent_Score, digital$Age, fast = TRUE)
## 
##  Descriptive statistics by group 
## group: 18-29
##    vars  n mean   sd median min max range  skew kurtosis   se
## X1    1 50 0.87 0.22   0.96 0.2   1   0.8 -2.13     3.36 0.03
## ------------------------------------------------------------ 
## group: 30-39
##    vars  n mean  sd median min max range  skew kurtosis   se
## X1    1 65 0.85 0.2   0.94 0.2   1   0.8 -1.77     2.73 0.02
## ------------------------------------------------------------ 
## group: 40-49
##    vars  n mean  sd median  min max range  skew kurtosis   se
## X1    1 45 0.82 0.2   0.88 0.35   1  0.65 -1.21     0.32 0.03
## ------------------------------------------------------------ 
## group: 50-59
##    vars  n mean   sd median min max range  skew kurtosis   se
## X1    1 51 0.69 0.27   0.74 0.2   1   0.8 -0.48    -1.08 0.04
## ------------------------------------------------------------ 
## group: 60-69
##    vars  n mean   sd median min max range skew kurtosis   se
## X1    1 45 0.71 0.24   0.75 0.2   1   0.8 -0.6    -0.73 0.04
## ------------------------------------------------------------ 
## group: 70-79
##    vars  n mean   sd median min max range  skew kurtosis   se
## X1    1 35 0.72 0.25   0.79 0.2   1   0.8 -0.75    -0.68 0.04
## ------------------------------------------------------------ 
## group: 80+
##    vars n mean   sd median min  max range skew kurtosis   se
## X1    1 7 0.53 0.35    0.4 0.2 0.98  0.78 0.19     -2.1 0.13
#===============================================================================
# 7. One-Way ANOVA for digital$Total_Percent_Score by digital$Age             ##
# You do not need to factor your independent variable; done in lines 1-55.    ##
# Write your code for these analyses in the blank spaces below.               ##
#===============================================================================

## One-way ANOVA
anova_result <- aov(digital$Total_Percent_Score ~ digital$Gender)
summary(anova_result)
##                 Df Sum Sq Mean Sq F value Pr(>F)
## digital$Gender   1  0.123 0.12337   2.075  0.151
## Residuals      296 17.595 0.05944
## Post-hoc analyses using the unadjusted Fisher's LSD method 
## (run the analysis regardless of the outcome of the omnibus F).
TukeyHSD(anova_result, conf.level = 0.95)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = digital$Total_Percent_Score ~ digital$Gender)
## 
## $`digital$Gender`
##                  diff        lwr        upr     p adj
## Woman-Man -0.04342422 -0.1027439 0.01589541 0.1507381
##----------------------------------------------------------------------------##
##  Compile your report. Follow the instructions at the end of the            ##
##  instructional guide. Your compiled report, not this code file, must be    ##
##  uploaded to Canvas as part of this assignment. If your compiled report    ##
##  is not uploaded, it will reduce your overall score on the coding project. ##
##----------------------------------------------------------------------------##