This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ### Q1 #### a
# setwd("C:/Users/avivg/OneDrive/Documents/R/Statistical Analysis Course/Ex1")
# df <- read.csv("q1.csv")
# head(df)
AGE <- c(53,43,33,45,46,55,41,55,36,45,55,50,49,47,69,51,48,62,45,37,50,50,50,58,53,57,53,61,47,56,44,46,58,48,38,74,60,32,51,50,40,61,63,56,45,61,70,59,57,69,44,56,50,56,43,48,52,62,48)
SAL = c(145,621,262,208,362,424,339,736,291,58,498,643,390,332,750,368,659,234,396,300,343,536,543,217,298,1103,406,254,862,204,206,250,21,298,350,800,726,370,536,291,808,543,149,350,242,198,213,296,317,482,155,802,200,282,573,388,250,396,572)
df <-data.frame(AGE, SAL)
age <- df$AGE
head(df)
## AGE SAL
## 1 53 145
## 2 43 621
## 3 33 262
## 4 45 208
## 5 46 362
## 6 55 424
# print("head age:")
# print(head(age))
# print("mean age:")
print(mean(age))
## [1] 51.54237
# print("median age:")
print(median(age))
## [1] 50
age <- df$AGE
salary <- df$SAL
# print("head age:")
print(head(salary))
## [1] 145 621 262 208 362 424
# print("mean salary:")
print(mean(salary))
## [1] 404.1695
# print("median salary:")
print(median(salary))
## [1] 350
#quantile
quant9 <- quantile(salary,0.9)
quant9
## 90%
## 738.8
plot(df)
abline(h=712,col="red")
names(age) <- salary
age_10 <- age[names(age) > 712.9]
print("Ages of top 10 %:")
## [1] "Ages of top 10 %:"
print(age_10)
## 736 750 862 800 726 808 802
## 55 69 47 74 60 40 56
sd(salary) # Standard deviation
## [1] 220.5335
# print("The count of managers up to ONE-SD from the mean salary")
print(sum(abs(mean(salary) - salary ) < sd(salary) ))#managers with salary that are one or more standard deviation
## [1] 44
# print("The count of managers up to TWO-SD from the mean salary")
print(sum(abs(mean(salary) - salary ) < 2*sd(salary) ))#managers with salary that are one or more standard deviation
## [1] 57
#age hist
hist(age)
hist(age ,freq=F)
#salary hist
hist(salary,breaks=10)
hist(salary, freq=F,breaks=10)
#plot the data frame (ages and salaries)
plot(df)
title("Age VS Salary")
cor(df$AGE, df$SAL) #low positive correlation
## [1] 0.1275554
df <- read.csv("firstgoalclean.csv")
head(df)
## Div Date HomeTeam AwayTeam FTHG FTAG FTR HTHG HTAG HTR Referee
## 1 E0 15/08/09 Aston Villa Wigan 0 2 A 0 1 A M Clattenburg
## 2 E0 15/08/09 Blackburn Man City 0 2 A 0 1 A M Dean
## 3 E0 15/08/09 Bolton Sunderland 0 1 A 0 1 A A Marriner
## 4 E0 15/08/09 Chelsea Hull 2 1 H 1 1 D A Wiley
## 5 E0 15/08/09 Everton Arsenal 1 6 A 0 3 A M Halsey
## 6 E0 15/08/09 Portsmouth Fulham 0 1 A 0 1 A M Atkinson
## HS AS HST AST HF AF HC AC HY AY HR AR HTFGM ATFGM FGM
## 1 11 14 5 7 15 14 4 6 2 2 0 0 0 31 31
## 2 17 8 9 5 12 9 5 4 2 1 0 0 0 3 3
## 3 11 20 3 13 16 10 4 7 2 1 0 0 0 5 5
## 4 26 7 12 3 13 15 12 4 1 2 0 0 37 28 28
## 5 8 15 5 9 11 13 4 9 0 0 0 0 90 26 26
## 6 16 9 4 3 11 18 6 4 3 2 0 0 0 14 14
length(df$Div)
## [1] 348
hist(df$FGM, breaks = 50)
# b
hist(df$HF) # normal dist'
# c
sd(df$FGM)
## [1] 21.96523
mean(df$FGM)
## [1] 29.85345
mean(df$FGM)
## [1] 29.85345
sd(df$HF)
## [1] 3.5547
# d half sd from mean
filterFGM <- abs(df$FGM - mean(df$FGM)) <= sd(df$FGM)/2
length(df$FGM[filterFGM])
## [1] 120
filterHF <- abs(df$HF - mean(df$HF)) <= sd(df$HF)/2
length(df$HF[filterHF])
## [1] 154
# tziun teken
head( (df$FGM[filterFGM] - mean(df$FGM)) / sd(df$FGM) ) # showing only five of them
## [1] 0.05219849 -0.08438102 -0.17543402 -0.49411953 -0.35754002 0.18877799
head( (df$HF[filterHF] - mean(df$HF)) / sd(df$HF) )
## [1] 0.09215579 0.37347346 -0.18916188 -0.18916188 0.37347346 -0.18916188
# E quantiles
quantile(df$FGM, 0.4)
## 40%
## 19.8
quantile(df$FGM, 0.7)
## 70%
## 38
AGE <- c(53,43,33,45,46,55,41,55,36,45,55,50,49,47,69,51,48,62,45,37,50,50,50,58,53,57,53,61,47,56,44,46,58,48,38,74,60,32,51,50,40,61,63,56,45,61,70,59,57,69,44,56,50,56,43,48,52,62,48)
SAL = c(145,621,262,208,362,424,339,736,291,58,498,643,390,332,750,368,659,234,396,300,343,536,543,217,298,1103,406,254,862,204,206,250,21,298,350,800,726,370,536,291,808,543,149,350,242,198,213,296,317,482,155,802,200,282,573,388,250,396,572)
Sal_after_tax <- SAL*0.8-10
names(Sal_after_tax) <- SAL
mean(SAL)
## [1] 404.1695
mean(Sal_after_tax)
## [1] 313.3356
median(SAL)
## [1] 350
median(Sal_after_tax)
## [1] 270
sd(SAL)
## [1] 220.5335
sd(Sal_after_tax)
## [1] 176.4268
hist(Sal_after_tax, breaks = 20)
#plot the data frame (ages and salaries)
# cor(df$AGE, df$SAL) #/low positive correlation