aliens <- read.csv ("aliens.csv", header = TRUE, stringsAsFactors = TRUE)
library(skimr)
make.my.sample <- function(studentID, n, data) {
RNGversion("3.2.1")
set.seed(studentID)
sample_values <- sample(size = n, c(1:nrow(data)))
my_sample <- data[data$ID %in% sample_values,]
return(my_sample)
}
my_sample <- suppressWarnings(make.my.sample(33002176, 30, aliens))

Question 1

2+3
## [1] 5
7-3
## [1] 4
8*8
## [1] 64
96/6
## [1] 16

Question 2

head(aliens)
##   ID age color island  college income antennae    politics anxiety depression
## 1  1  33  Blue  Blick Ganymede  27000    Curly Republicant      46         92
## 2  2  47  Pink  Plume Ganymede 124000 Straight Independone      49         94
## 3  3  39  Pink  Plume       Io  43000 Straight Democrulite      51        119
## 4  4  24  Pink  Blick       Io  46000 Straight Republicant      45         92
## 5  5  53  Pink  Blick       Io  44000 Straight Democrulite      46         93
## 6  6  36  Blue  Blick   Europa  28000    Curly Republicant      49         98
##   sociable control memory intelligence time1 time2 time3 food1 sleep food2
## 1      108      68     94          119  5.86  4.36  4.11     5   6.0     9
## 2      110      72    109          127  5.07  4.35  4.97     8   7.8    11
## 3       79      62     83          112  5.66  6.13  6.15     7   4.4     9
## 4      117      65     88          115  7.81  8.13  6.12     6   6.0     9
## 5      109      56    106          122  5.04  4.55  4.15     8   4.8     9
## 6      101      49    103          104  4.81  3.65  5.11    10   5.5     7
##   reasoning_trials
## 1                1
## 2                1
## 3                1
## 4                1
## 5                1
## 6                1
head(aliens, 20)
##    ID age color     island  college income antennae    politics anxiety
## 1   1  33  Blue      Blick Ganymede  27000    Curly Republicant      46
## 2   2  47  Pink      Plume Ganymede 124000 Straight Independone      49
## 3   3  39  Pink      Plume       Io  43000 Straight Democrulite      51
## 4   4  24  Pink      Blick       Io  46000 Straight Republicant      45
## 5   5  53  Pink      Blick       Io  44000 Straight Democrulite      46
## 6   6  36  Blue      Blick   Europa  28000    Curly Republicant      49
## 7   7  58  Pink Nanspucket   Europa  29000    Curly Democrulite      60
## 8   8  25  Pink      Blick       Io  37000 Straight Republicant      51
## 9   9  38  Pink Nanspucket   Europa  35000 Straight Democrulite      52
## 10 10  40  Pink      Plume   Europa  33000 Straight Independone      48
## 11 11  48  Blue      Blick Ganymede  46000 Straight Republicant      58
## 12 12  56  Pink Nanspucket   Europa  71000 Straight Democrulite      56
## 13 13  44  Blue Nanspucket   Europa  25000 Straight Independone      49
## 14 14  40  Pink      Plume       Io  33000 Straight Independone      45
## 15 15  61  Pink Nanspucket   Europa  38000 Straight Democrulite      52
## 16 16  14  Pink Nanspucket Callisto  28000 Straight Independone      58
## 17 17  62  Pink      Blick Callisto  51000 Straight Democrulite      49
## 18 18  59  Pink      Plume Callisto  85000 Straight Independone      48
## 19 19  47  Pink      Plume       Io  53000 Straight Democrulite      43
## 20 20  40  Pink      Plume Callisto  60000    Curly Republicant      47
##    depression sociable control memory intelligence time1 time2 time3 food1
## 1          92      108      68     94          119  5.86  4.36  4.11     5
## 2          94      110      72    109          127  5.07  4.35  4.97     8
## 3         119       79      62     83          112  5.66  6.13  6.15     7
## 4          92      117      65     88          115  7.81  8.13  6.12     6
## 5          93      109      56    106          122  5.04  4.55  4.15     8
## 6          98      101      49    103          104  4.81  3.65  5.11    10
## 7         107       89      42     71           86  4.88  4.09  4.35    13
## 8          99      101      55     94          116  4.24  3.80  2.69     7
## 9          83      105      68    102          108  4.69  3.35  3.89     7
## 10        106       93      56    101          104  4.78  3.89  3.04    11
## 11        109       94      56     93          115  5.27  5.37  4.37     9
## 12        116       82      61     78           94  9.43  9.92  8.58     9
## 13         98      102      74     96          106  8.82  7.22  8.69     7
## 14        100      100      56     95          117  4.99  3.49  5.48    12
## 15        106       90      40     77           88  4.98  3.52  4.91     6
## 16         99      101      82     86          104  5.26  5.03  4.02    10
## 17        107       92      56     84           96  8.87  9.28  9.17     7
## 18        121       72      61     88           99  4.93  3.03  4.30    13
## 19         83      123      54     93          115  6.13  5.10  6.46    10
## 20         86      103      65     88          100  4.79  5.04  3.73     9
##    sleep food2 reasoning_trials
## 1    6.0     9                1
## 2    7.8    11                1
## 3    4.4     9                1
## 4    6.0     9                1
## 5    4.8     9                1
## 6    5.5     7                1
## 7    4.0     8                1
## 8    6.6     9                1
## 9    3.2     6                1
## 10   5.7     7                1
## 11   6.4     8                4
## 12   6.1    12                2
## 13   6.9     7                1
## 14   7.5     7                4
## 15   5.3     8                5
## 16   5.6     6                3
## 17   7.6     9                2
## 18   6.5    10                2
## 19   6.9     8                2
## 20   5.1     8                4
tail(aliens)
##          ID age color     island  college income antennae    politics anxiety
## 9995   9995  54  Pink Nanspucket Ganymede 176000 Straight Democrulite      48
## 9996   9996  66  Blue      Blick   Europa  52000 Straight Republicant      52
## 9997   9997  33  Pink      Plume Callisto  89000 Straight Republicant      53
## 9998   9998  60  Pink Nanspucket Callisto  23000 Straight Independone      51
## 9999   9999  51  Blue      Blick   Europa  37000 Straight Republicant      39
## 10000 10000  24  Pink Nanspucket       Io  14000 Straight Democrulite      49
##       depression sociable control memory intelligence time1 time2 time3 food1
## 9995          90      115      73     84          115 11.89 11.12  9.91     7
## 9996         110       80      57     91          100  4.79  2.92  2.95     6
## 9997         108       92      74     99          108  3.71  2.88  3.89    11
## 9998         107       89      75     87          102  3.40  3.61  3.76     7
## 9999          92      108      64    106          109  6.58  6.45  5.18     6
## 10000         99      101      69    104          124  5.13  3.32  3.47     8
##       sleep food2 reasoning_trials
## 9995    6.2     8                4
## 9996    5.4     7                4
## 9997    5.5     5                2
## 9998    3.9    10                3
## 9999    5.9     9                2
## 10000   6.8     7                1

Question 3

10000 individuals are represented in this dataframe.

Question 4

class(aliens$age)
## [1] "integer"
class(aliens$politics)
## [1] "factor"
class(aliens$college)
## [1] "factor"

Both college and politics are categorical factors.

class(aliens$sleep)
## [1] "numeric"
class(aliens$income)
## [1] "numeric"

Both sleep and income are numerical variables. I believe both would be considered continuous because of the ability for them to change and I believe they would also best be regarded as interval because there is no relation to each other and they differ among the aliens.

Question 5

summary(aliens$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.00   26.00   40.00   40.21   55.00   70.00

Based on what the summary function provided, I believe the summary function can be used to see a summary of the data with specific numbers without having to produce some type of chart like a box plot for example.

Question 6

summary(aliens$island)
##      Blick Nanspucket      Plume 
##       3504       3032       3464

Most aliens come from the Island Blick.

summary(aliens$politics)
## Democrulite Independone Republicant 
##        3218        3452        3330

The most popular political party is Independone.

summary(aliens$sociable)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35.00   94.00  100.00   99.99  106.00  167.00

The highest sociability score obtained by any alien is 167.00.

summary(aliens$memory)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   52.00   85.00   92.00   92.06   99.00  129.00

The lowest memory score obtained by any alien is 52.00.

Question 7

summary(my_sample$island)
##      Blick Nanspucket      Plume 
##          9          9         12

Based on my sample, most aliens come from the Island Plume.

summary(my_sample$politics)
## Democrulite Independone Republicant 
##          11           8          11

Based on my sample, most aliens are either Democrulite or Republicant.

summary(my_sample$sociable)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    66.0    96.0   100.0   100.5   106.5   133.0

Based on my sample, the highest sociability score obtained by any alien is 133.0.

summary(my_sample$memory)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    67.0    84.0    91.0    90.1    96.0   113.0

Based on my sample, the lowest memory score among the alien’s is 67.0.

Question 8

college.table <- table(my_sample$college)
college.table
## 
## Callisto   Europa Ganymede       Io 
##        6       11        6        7

A summary of this data would be that based on my personal sample of data, 6 aliens attend the college of Callisto, 11 aliens attend the college of Europa, 6 aliens attend the college of Ganymede, and 7 aliens attend the college of Io.

politics.table <- table(my_sample$politics)
politics.table
## 
## Democrulite Independone Republicant 
##          11           8          11

A summary of this data would be that based on my personal sample of data, 11 aliens are Democrulite, 8 aliens are Independone,and 11 aliens are Republicant.

antennae.table <- table(my_sample$antennae)
antennae.table
## 
##    Curly Straight 
##        2       28

A summary of this data would be that based on my personal sample, 2 of the aliens have curly antennaes and 28 aliens have straight antennaes.

Question 9

barplot(college.table)

barplot(politics.table)

barplot(antennae.table)

Question 10

barplot(college.table, politics.table)

barplot(college.table, antennae.table)

Question 11

mean(aliens$anxiety)
## [1] 49.9967
median(aliens$anxiety)
## [1] 50
var(aliens$anxiety)
## [1] 25.0708
sd(aliens$anxiety)
## [1] 5.007075
mean(aliens$income)
## [1] 69707.9
median(aliens$income)
## [1] 55000
var(aliens$income)
## [1] 2510807258
sd(aliens$income)
## [1] 50107.96
mean(aliens$intelligence)
## [1] 108.5385
median(aliens$intelligence)
## [1] 109
var(aliens$intelligence)
## [1] 89.8317
sd(aliens$intelligence)
## [1] 9.477959

When looking at the median and means of all of the distrubutions, they do not seem to be much different from eachother. The only data frame I find the most different is the income variables. Thus does make sense because the incomes vary more often than intelligence or anxieties among the aliens.

Question 12

hist(aliens$income, breaks=50)

hist(aliens$income, breaks= 10000)

hist(aliens$income, breaks=1)

After trying different numbers of breaks for the histogram of the income variable, I find myself having trouble finding the best value. I tried 1, than 50, than 100, 500, 1000, 50000 yet I the x axis of the graph still does not make sense to me.

Question 13

boxplot(my_sample$anxiety~my_sample$island)

This graph includes the ylim argument in order to put a minimum and maximum on the y axis. By doing this it makes it easier to see the data clearer without unnecessary data. When removed, it seems R uses number that will fit the data but does not show the bigger picture.