Set up Rstudio

knitr::opts_chunk$set(echo = TRUE, warning=FALSE,comment = NA, message=FALSE,
                      fig.height=4, fig.width=6)

Data Importation

german.marketing.data <- read.csv("C:\\Users\\user\\Downloads\\german_credit__data.csv")

Load the following library

library(plyr)
count(german.marketing.data)

Delete the missing observations

german.marketing.data<-na.omit(german.marketing.data)

The available observations after deleting the missing observations

count(german.marketing.data)

Data preparation and cleaning

The Structure of the Dataset

str(german.marketing.data)
'data.frame':   522 obs. of  10 variables:
 $ X               : int  1 3 4 7 9 10 11 12 13 14 ...
 $ Age             : int  22 45 53 35 28 25 24 22 60 28 ...
 $ Sex             : chr  "female" "male" "male" "male" ...
 $ Job             : int  2 2 2 3 3 2 2 2 1 2 ...
 $ Housing         : chr  "own" "free" "free" "rent" ...
 $ Saving.accounts : chr  "little" "little" "little" "little" ...
 $ Checking.account: chr  "moderate" "little" "little" "moderate" ...
 $ Credit.amount   : int  5951 7882 4870 6948 5234 1295 4308 1567 1199 1403 ...
 $ Duration        : int  48 42 24 36 30 12 48 12 24 15 ...
 $ Purpose         : chr  "radio/TV" "furniture/equipment" "car" "car" ...
 - attr(*, "na.action")= 'omit' Named int [1:478] 1 3 6 7 9 17 18 20 21 25 ...
  ..- attr(*, "names")= chr [1:478] "1" "3" "6" "7" ...
attach(german.marketing.data)

View the first few observations

head(german.marketing.data,5)

Summary Statistics

summary(german.marketing.data)
       X              Age            Sex                 Job       
 Min.   :  1.0   Min.   :19.00   Length:522         Min.   :0.000  
 1st Qu.:233.5   1st Qu.:26.00   Class :character   1st Qu.:2.000  
 Median :502.5   Median :31.50   Mode  :character   Median :2.000  
 Mean   :493.5   Mean   :34.89                      Mean   :1.875  
 3rd Qu.:736.8   3rd Qu.:41.00                      3rd Qu.:2.000  
 Max.   :999.0   Max.   :75.00                      Max.   :3.000  
   Housing          Saving.accounts    Checking.account   Credit.amount  
 Length:522         Length:522         Length:522         Min.   :  276  
 Class :character   Class :character   Class :character   1st Qu.: 1298  
 Mode  :character   Mode  :character   Mode  :character   Median : 2326  
                                                          Mean   : 3279  
                                                          3rd Qu.: 3971  
                                                          Max.   :18424  
    Duration       Purpose         
 Min.   : 6.00   Length:522        
 1st Qu.:12.00   Class :character  
 Median :18.00   Mode  :character  
 Mean   :21.34                     
 3rd Qu.:26.75                     
 Max.   :72.00                     

Converting string variables to factors

if(!require(dplyr)){install.packages('dplyr')} #installing the package if not
library(dplyr) #loading the library
library(gtsummary)
library(ggplot2)

german_crdt_data2<- german.marketing.data %>%mutate(Sex = factor(Sex),Housing = factor(Housing),
                                               Saving.accounts=factor(Saving.accounts),
                                               Checking.account =factor(Checking.account),
                                               Purpose=factor(Purpose))

##factoring job levels

german.marketing.data$Job<-factor(german.marketing.data$Job, levels = c(0,1,2,3),
                              labels = c("unskilled and non-resident", "unskilled and resident", "skilled",
                                         "highly skilled"))

Summary statistics

german.marketing.data[,c(3, 8, 9)] %>% tbl_summary(by = Sex) %>% add_p()
Characteristic female, N = 1681 male, N = 3541 p-value2
Credit.amount 1,959 (1,214, 3,624) 2,452 (1,349, 4,285) 0.014
Duration 18 (12, 24) 18 (12, 30) 0.12
1 Median (IQR)
2 Wilcoxon rank sum test
if(!require(stargazer)){install.packages('stargazer')}
library(stargazer)
stargazer(german.marketing.data[,-1], type = "text")

================================================
Statistic      N    Mean    St. Dev.  Min  Max  
------------------------------------------------
Age           522  34.889    11.788   19    75  
Credit.amount 522 3,278.745 2,929.155 276 18,424
Duration      522  21.339    12.474    6    72  
------------------------------------------------
if(!require(gtsummary)){install.packages('gtsummary')}
library(gtsummary)

german.marketing.data %>%
  select(Age, Credit.amount, Duration) %>%
  tbl_summary(
    #by = trt,
    label = list(Credit.amount ~ "Credit amount"),
    statistic = list(all_continuous() ~ "{min} {median} {mean} {sd} {max}"),
    digits = list(c(Credit.amount, Age, Duration) ~ c(0, 0, 2, 2,0))
  )
Characteristic N = 5221
Age 19 32 34.89 11.79 75
Credit amount 276 2,326 3,278.75 2,929.16 18,424
Duration 6 18 21.34 12.47 72
1 Minimum Median Mean SD Maximum
german.marketing.data[,c(-1,-2, -8, -9)] %>% tbl_summary()
Characteristic N = 5221
Sex
    female 168 (32%)
    male 354 (68%)
Job
    unskilled and non-resident 14 (2.7%)
    unskilled and resident 116 (22%)
    skilled 313 (60%)
    highly skilled 79 (15%)
Housing
    free 65 (12%)
    own 349 (67%)
    rent 108 (21%)
Saving.accounts
    little 412 (79%)
    moderate 64 (12%)
    quite rich 23 (4.4%)
    rich 23 (4.4%)
Checking.account
    little 245 (47%)
    moderate 224 (43%)
    rich 53 (10%)
Purpose
    business 53 (10%)
    car 173 (33%)
    domestic appliances 6 (1.1%)
    education 28 (5.4%)
    furniture/equipment 107 (20%)
    radio/TV 132 (25%)
    repairs 14 (2.7%)
    vacation/others 9 (1.7%)
1 n (%)
n <- c(168, 354)

perc <- paste0(n, " = ", round(100 * n/sum(n), 2), "%")
pie(n, labels = perc)

ggplot(german.marketing.data, aes(x=Age, y=Credit.amount)) +
  geom_point()+
  geom_smooth(method=lm, se=FALSE)

##Pie chart of sex

pie(n, labels = perc, main = "Sex pie chart",col = c("blue", "black"))
legend("topleft", c("Male","Female"), cex = 0.8,
       fill = c("blue", "black"))

german.marketing.data[,c(3, 8, 9)] %>% tbl_summary(by = Sex) %>% add_p()
Characteristic female, N = 1681 male, N = 3541 p-value2
Credit.amount 1,959 (1,214, 3,624) 2,452 (1,349, 4,285) 0.014
Duration 18 (12, 24) 18 (12, 30) 0.12
1 Median (IQR)
2 Wilcoxon rank sum test

gender and purpose

german.marketing.data [,c(3,10)] %>%
  tbl_summary(by = Sex) %>%
  add_p() %>%
  add_overall() %>% 
  bold_labels()
Characteristic Overall, N = 5221 female, N = 1681 male, N = 3541 p-value
Purpose
    business 53 (10%) 12 (7.1%) 41 (12%)
    car 173 (33%) 46 (27%) 127 (36%)
    domestic appliances 6 (1.1%) 2 (1.2%) 4 (1.1%)
    education 28 (5.4%) 12 (7.1%) 16 (4.5%)
    furniture/equipment 107 (20%) 44 (26%) 63 (18%)
    radio/TV 132 (25%) 46 (27%) 86 (24%)
    repairs 14 (2.7%) 3 (1.8%) 11 (3.1%)
    vacation/others 9 (1.7%) 3 (1.8%) 6 (1.7%)
1 n (%)
library(ggplot2)
ggplot(german.marketing.data, aes(x = Purpose))+
  geom_bar(aes(fill = Sex), 
           position = position_stack(reverse = FALSE)) +
  geom_text(aes(label = after_stat(count)),  stat='count'
            , color="green", size =3, nudge_y= 8, nudge_x=0,size=9)+
 theme_minimal()

ggplot(mpg, aes(y = class)) +
  geom_bar(aes(fill = drv), position = position_stack(reverse = TRUE)) +
  theme(legend.position = "top")

age and purpose

german.marketing.data [,c(2,10)] %>%
  tbl_summary(by = Purpose,
              statistic = list(all_continuous() ~ "{mean} {median} {sd}"),
              digits = list(Age ~ 2)) %>%
  add_p() %>%
  add_overall() %>% 
  bold_labels()
Characteristic Overall, N = 5221 business, N = 531 car, N = 1731 domestic appliances, N = 61 education, N = 281 furniture/equipment, N = 1071 radio/TV, N = 1321 repairs, N = 141 vacation/others, N = 91 p-value2
Age 34.89 31.50 11.79 35.19 33.00 11.23 36.86 34.00 12.41 35.50 32.00 12.82 35.57 32.50 11.35 32.41 30.00 9.71 32.95 29.00 11.66 40.86 36.00 15.92 41.33 39.00 12.38 0.003
1 Mean Median SD
2 Kruskal-Wallis rank sum test

Age purpose

ggplot(german.marketing.data, aes(x = Purpose, y = Age))+
  labs(title = "", y = "Age", x = "Purpose")+
  geom_boxplot(aes(fill = Purpose)) +theme(legend.position="none")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

group_mean_age <- aggregate(Age ~ Purpose, data = german.marketing.data, mean)
group_mean_age

To show (e.g.) means, you need geom_col()

df <- data.frame(Purpose = c("Vocation/others",  "Repairs", 'Education', 'Car', 
                             'Business', 'Radio/TV', "Domestic appliances", 
                         "Furniture/equipment"),
                 mean = c(41.33333,
                          40.85714,
                          35.57143,
                          36.86127,
                          35.18868,
                          32.94697,
                          35.50000,
                          32.41121))

ggplot(df, aes(x= reorder(Purpose, +mean), y = mean)) +
  geom_col(aes(fill="red"))+
  labs(title = "", y = "Mean Age", x = "Purpose")+
  theme_minimal() +theme(legend.position="none")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

ggplot(data= german.marketing.data, aes(x=Purpose, y =Age)) +
  geom_bar(position = "dodge",
           stat = "summary",
           fun = "mean")+
  theme_minimal() +theme(legend.position="none")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

Age purpose sex

ggplot(data= german.marketing.data, aes(x=Purpose, y =Age, fill=Sex)) +
  geom_bar(position = "dodge",
           stat = "summary",
           fun = "mean")+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

Regression Model

Load the following libraries

library(kableExtra)
library(jtools)
library(gtsummary)
library(broom)
render = 'normal_print'

Estimate the model

fit <- lm(Credit.amount~Age+Duration, data = german.marketing.data)
summ(fit,confint = TRUE, digits = 3)
Observations 522
Dependent variable Credit.amount
Type OLS linear regression
F(2,519) 160.883
0.383
Adj. R² 0.380
Est. 2.5% 97.5% t val. p
(Intercept) -496.539 -1202.912 209.833 -1.381 0.168
Age 20.143 3.308 36.979 2.351 0.019
Duration 143.985 128.075 159.894 17.779 0.000
Standard errors: OLS
tbl_regression(fit)
Characteristic Beta 95% CI1 p-value
Age 20 3.3, 37 0.019
Duration 144 128, 160 <0.001
1 CI = Confidence Interval

Alternatively

fit%>%
  tbl_regression()
Characteristic Beta 95% CI1 p-value
Age 20 3.3, 37 0.019
Duration 144 128, 160 <0.001
1 CI = Confidence Interval
kableExtra::kable(x = broom::tidy(fit), format = "pipe")
term estimate std.error statistic p.value
(Intercept) -496.53927 359.560314 -1.380962 0.1678849
Age 20.14346 8.569753 2.350530 0.0191200
Duration 143.98472 8.098356 17.779500 0.0000000
lm(Credit.amount~Age+Duration, data = german.marketing.data)|>
  tidy() |> 
  kable() |> 
  kable_classic()
term estimate std.error statistic p.value
(Intercept) -496.53927 359.560314 -1.380962 0.1678849
Age 20.14346 8.569753 2.350530 0.0191200
Duration 143.98472 8.098356 17.779500 0.0000000

Additional Concept (Regression Analysis)

Use the data on price pass through in meat to estimate a regression of price change at retail on price change at the farm gate. Report a regression table from R. Interpret the coefficient on price change at the farm gate. Interpret the t-statistics and the p-value reported by R. Is this effect of price change at the farm gate statistically significant at the 5% level?

Import the dataset

retail <- read.csv("C:\\Users\\user\\Downloads\\retail.csv")

View the first few observations

head(retail,5)

Count the observations

count(retail)

Attacha the dataset

attach(retail)

Estimate the Regression Equation

my_model <- lm(retail$change_retail_value~retail$change_farm_value, data=retail)

View the Results Using Stargazer

stargazer(my_model, report = "vc*stp",type = "text",out = "./q7results.txt")

===============================================
                        Dependent variable:    
                    ---------------------------
                        change_retail_value    
-----------------------------------------------
change_farm_value            0.314***          
                              (0.044)          
                             t = 7.120         
                             p = 0.000         
                                               
Constant                     0.952***          
                              (0.336)          
                             t = 2.834         
                             p = 0.005         
                                               
-----------------------------------------------
Observations                    623            
R2                             0.075           
Adjusted R2                    0.074           
Residual Std. Error      8.369 (df = 621)      
F Statistic           50.689*** (df = 1; 621)  
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01

QUESTIONNAIRE

Import the dataset
quest <- read.csv("C:\\Users\\user\\Downloads\\Domestic violence on adolescents social behavior.csv")
head(quest,10)

factoring job levels

quest$What.is.your.age.bracket..years..<-factor(quest$What.is.your.age.bracket..years.., levels = c(1,2,3),
                              labels = c("12-15","16-19","20-23"))
quest$What.is.your.gender.<-factor(quest$What.is.your.gender., levels = c(1,2),
                              labels = c("Male", "Female"))
quest$What.is.your.ethnic.racial.background.<-factor(quest$What.is.your.ethnic.racial.background., levels = c(1,2,3),
                              labels = c("Kikuyu", "Meru", "Embu"))
quest$What.is.your.current.grade.level.in.school.<-factor(quest$What.is.your.current.grade.level.in.school., levels = c(1,2,3),
                              labels = c("Junior Secondary", "Secondary", "Senior Secondary"))
quest$What.is.your.living.situation..with.parents..guardians..relatives..etc...<-factor(quest$What.is.your.living.situation..with.parents..guardians..relatives..etc..., levels = c(1,2,3),labels = c("Parent", "Guardian", "Relative"))

quest$Have.you.ever.witnessed.domestic.violence.in.your.household.<-factor(quest$Have.you.ever.witnessed.domestic.violence.in.your.household., levels = c(1,2), labels = c("Yes", "No"))

quest$Have.you.ever.been.the.victim.of.domestic.violence.<-factor(quest$Have.you.ever.been.the.victim.of.domestic.violence., levels = c(1,2), labels = c("Yes", "No"))

quest$Have.you.ever.witnessed.domestic.violence.outside.of.your.household..e.g...at.a.friend.s.house.or.in.public..<-factor(quest$Have.you.ever.witnessed.domestic.violence.outside.of.your.household..e.g...at.a.friend.s.house.or.in.public.., levels = c(1,2), labels = c("Yes", "No"))

quest$How.often.does.domestic.violence.occur.in.your.household.<-factor(quest$How.often.does.domestic.violence.occur.in.your.household., levels = c(1,2,3,4,5),
                              labels = c("Never", "Rarely", "Sometimes","Always","Often"))

quest$How.has.domestic.violence.affected.your.daily.life.and.relationships..Social.Behavior.<-factor(quest$How.has.domestic.violence.affected.your.daily.life.and.relationships..Social.Behavior., levels = c(1,2,3,4,5),
                              labels = c("Not at all affected", "Slightly affected", "Moderately affected", "Very affected","Extremely affected"))

quest$How.often.do.you.socialize.with.your.peers.outside.of.school.<-factor(quest$How.often.do.you.socialize.with.your.peers.outside.of.school., levels = c(1,2,3,4,5),
                              labels = c("Never", "Rarely", "Sometimes","Always","Often"))

quest$Have.you.ever.felt.excluded.or.isolated.from.your.peer.group.<-factor(quest$Have.you.ever.felt.excluded.or.isolated.from.your.peer.group., levels = c(1,2),
                              labels = c("Yes", "No"))

quest$Do.you.feel.comfortable.talking.to.your.parents.or.guardians.about.your.social.life.<-factor(quest$Do.you.feel.comfortable.talking.to.your.parents.or.guardians.about.your.social.life., levels = c(1,2),
                              labels = c("Yes", "No"))

quest$How.often.do.you.feel.lonely.or.isolated.<-factor(quest$How.often.do.you.feel.lonely.or.isolated., levels = c(1,2,3,4,5),
                              labels = c("Never", "Rarely", "Sometimes","Always","Often"))

quest$How.has.domestic.violence.affected.your.social.behavior.and.relationships.<-factor(quest$How.has.domestic.violence.affected.your.social.behavior.and.relationships., levels = c(1,2,3,4,5),
                               labels = c("Not at all affected", "Slightly affected", "Moderately affected", "Very affected","Extremely affected"))

quest$Have.you.ever.been.diagnosed.with.depression.or.other.mental.health.issues.<-factor(quest$Have.you.ever.been.diagnosed.with.depression.or.other.mental.health.issues., levels = c(1,2),
                              labels = c("Yes", "No"))

quest$How.often.do.you.feel.sad.or.hopeless.<-factor(quest$How.often.do.you.feel.sad.or.hopeless., levels = c(1,2,3,4,5),
                              labels = c("Never", "Rarely", "Sometimes","Always","Often"))

quest$Have.you.ever.had.thoughts.of.suicide.or.self.harm.<-factor(quest$Have.you.ever.had.thoughts.of.suicide.or.self.harm., levels = c(1,2),
                              labels = c("Yess", "No"))
quest$Do.you.feel.comfortable.talking.to.your.parents.or.guardians.about.your.mental.health.<-factor(quest$Do.you.feel.comfortable.talking.to.your.parents.or.guardians.about.your.mental.health., levels = c(1,2),
                              labels = c("Yes", "No"))

quest$How.has.domestic.violence.affected.your.mental.health.and.well.being.<-factor(quest$How.has.domestic.violence.affected.your.mental.health.and.well.being., levels = c(1,2,3,4,5),
                              labels = c("Not at all affected", "Slightly affected", "Moderately affected", "Very affected","Extremely affected"))

quest$Emotional.support.from.family...My.family.members.express.love.and.affection.towards.me..<-factor(quest$Emotional.support.from.family...My.family.members.express.love.and.affection.towards.me.., levels = c(1,2,3,4,5),
                              labels = c("Strongly Disagree", "Disagree", "Neither Agree no Disagree", "Agree","Strongly Agree"))

quest$Instrumental.support.from.friends...My.friends.offer.to.help.me.with.tasks.or.errands.when.I.need.it..<-factor(quest$Instrumental.support.from.friends...My.friends.offer.to.help.me.with.tasks.or.errands.when.I.need.it.., levels = c(1,2,3,4,5),
                              labels = c("Strongly Disagree", "Disagree", "Neither Agree no Disagree", "Agree","Strongly Agree"))

quest$Informational.support.from.teachers...My.teachers.provide.me.with.useful.information.and.guidance.about.school.and.academic.issues..<-factor(quest$Informational.support.from.teachers...My.teachers.provide.me.with.useful.information.and.guidance.about.school.and.academic.issues.., levels = c(1,2,3,4,5),
                              labels = c("Strongly Disagree", "Disagree", "Neither Agree no Disagree", "Agree","Strongly Agree"))

quest$Emotional.support.from.significant.other...My.significant.other.listens.to.me.and.is.supportive.when.I.need.to.talk.about.my.problems..<-factor(quest$Emotional.support.from.significant.other...My.significant.other.listens.to.me.and.is.supportive.when.I.need.to.talk.about.my.problems.., levels = c(1,2,3,4,5),
                              labels = c("Strongly Disagree", "Disagree", "Neither Agree no Disagree", "Agree","Strongly Agree"))

quest$Instrumental.support.from.family...My.family.members.help.me.with.practical.tasks.when.I.need.it..such.as.transportation.or.shopping..<-factor(quest$Instrumental.support.from.family...My.family.members.help.me.with.practical.tasks.when.I.need.it..such.as.transportation.or.shopping.., levels = c(1,2,3,4,5),
                              labels = c("Strongly Disagree", "Disagree", "Neither Agree no Disagree", "Agree","Strongly Agree"))
quest$Informational.support.from.counselor...My.counselor.provides.me.with.helpful.information.and.guidance.about.dealing.with.stress.and.emotional.issues..<-factor(quest$Informational.support.from.counselor...My.counselor.provides.me.with.helpful.information.and.guidance.about.dealing.with.stress.and.emotional.issues.., levels = c(1,2,3,4,5),
                              labels = c("Strongly Disagree", "Disagree", "Neither Agree no Disagree", "Agree","Strongly Agree"))

Overall Summary Statistics

quest[,c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)] %>% tbl_summary()
Characteristic N = 3501
What.is.your.age.bracket..years..
    12-15 221 (63%)
    16-19 129 (37%)
    20-23 0 (0%)
What.is.your.gender.
    Male 199 (57%)
    Female 151 (43%)
What.is.your.ethnic.racial.background.
    Kikuyu 36 (10%)
    Meru 171 (49%)
    Embu 143 (41%)
What.is.your.current.grade.level.in.school.
    Junior Secondary 179 (51%)
    Secondary 102 (29%)
    Senior Secondary 69 (20%)
What.is.your.living.situation..with.parents..guardians..relatives..etc...
    Parent 97 (28%)
    Guardian 183 (52%)
    Relative 70 (20%)
Have.you.ever.witnessed.domestic.violence.in.your.household. 274 (78%)
Have.you.ever.been.the.victim.of.domestic.violence. 239 (68%)
Have.you.ever.witnessed.domestic.violence.outside.of.your.household..e.g...at.a.friend.s.house.or.in.public.. 203 (58%)
How.often.does.domestic.violence.occur.in.your.household.
    Never 100 (29%)
    Rarely 66 (19%)
    Sometimes 84 (24%)
    Always 75 (21%)
    Often 25 (7.1%)
How.has.domestic.violence.affected.your.daily.life.and.relationships..Social.Behavior.
    Not at all affected 79 (23%)
    Slightly affected 66 (19%)
    Moderately affected 80 (23%)
    Very affected 93 (27%)
    Extremely affected 32 (9.1%)
How.often.do.you.socialize.with.your.peers.outside.of.school.
    Never 71 (20%)
    Rarely 72 (21%)
    Sometimes 90 (26%)
    Always 85 (24%)
    Often 32 (9.1%)
Have.you.ever.felt.excluded.or.isolated.from.your.peer.group. 239 (68%)
Do.you.feel.comfortable.talking.to.your.parents.or.guardians.about.your.social.life. 203 (58%)
How.often.do.you.feel.lonely.or.isolated.
    Never 82 (23%)
    Rarely 73 (21%)
    Sometimes 93 (27%)
    Always 77 (22%)
    Often 25 (7.1%)
How.has.domestic.violence.affected.your.social.behavior.and.relationships.
    Not at all affected 107 (31%)
    Slightly affected 81 (23%)
    Moderately affected 78 (22%)
    Very affected 66 (19%)
    Extremely affected 18 (5.1%)
1 n (%)