Research Methods & Data Analyses

AUTHOR

Jal Vashi (N0990629)

Week 1 - Introduction

Formative exercise - Making your workbook online

After 100s of tries, I finally published the workbook. :)

Week 2 - Tutorial

It was nice to understand and troubleshoot R and R Studio.

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.1.2
Warning: package 'tibble' was built under R version 4.1.2
Warning: package 'forcats' was built under R version 4.1.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins)
Warning: package 'palmerpenguins' was built under R version 4.1.2
data("penguins")
penguins %>% 
  select(1:5)
# A tibble: 344 × 5
   species island    bill_length_mm bill_depth_mm flipper_length_mm
   <fct>   <fct>              <dbl>         <dbl>             <int>
 1 Adelie  Torgersen           39.1          18.7               181
 2 Adelie  Torgersen           39.5          17.4               186
 3 Adelie  Torgersen           40.3          18                 195
 4 Adelie  Torgersen           NA            NA                  NA
 5 Adelie  Torgersen           36.7          19.3               193
 6 Adelie  Torgersen           39.3          20.6               190
 7 Adelie  Torgersen           38.9          17.8               181
 8 Adelie  Torgersen           39.2          19.6               195
 9 Adelie  Torgersen           34.1          18.1               193
10 Adelie  Torgersen           42            20.2               190
# ℹ 334 more rows
data("penguins")
penguins %>% 
group_by(species) %>% 
  ggplot(aes(x=bill_length_mm, color=species, fill=species))+
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).

library(tidyverse)
library(palmerpenguins)

data("penguins")
penguins %>% 
group_by(species) %>% 
  ggplot(aes(x=species, 
             y=bill_length_mm, 
             color=species, 
             fill=species))+
  geom_boxplot(alpha=0.5)+
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_boxplot()`).

library(vtable)
Loading required package: kableExtra

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
library(gt)

penguins %>% 
  group_by(species) %>% 
  na.omit() %>% 
  summarise(mean = mean(bill_length_mm), sd=sd(bill_length_mm), n = n())
# A tibble: 3 × 4
  species    mean    sd     n
  <fct>     <dbl> <dbl> <int>
1 Adelie     38.8  2.66   146
2 Chinstrap  48.8  3.34    68
3 Gentoo     47.6  3.11   119

Week 3 - Diamonds Exercise 6.7

library(tidyverse)


midwest %>% 
  group_by(state) %>% 
  summarize(poptotalmean = mean(poptotal),
            poptotalmed = median(poptotal),
            popmax = max(poptotal),
            popmin = min(poptotal),
            popdistinct = n_distinct(poptotal),
            popfirst = first(poptotal),
            popany = any(poptotal < 5000),
            popany2 = any(poptotal > 2000000)) %>% 
  ungroup()
# A tibble: 5 × 9
  state poptotalmean poptotalmed  popmax popmin popdistinct popfirst popany
  <chr>        <dbl>       <dbl>   <int>  <int>       <int>    <int> <lgl> 
1 IL         112065.      24486. 5105067   4373         101    66090 TRUE  
2 IN          60263.      30362.  797159   5315          92    31095 FALSE 
3 MI         111992.      37308  2111687   1701          83    10145 TRUE  
4 OH         123263.      54930. 1412140  11098          88    25371 FALSE 
5 WI          67941.      33528   959275   3890          72    15682 TRUE  
# ℹ 1 more variable: popany2 <lgl>

Week 3 Diamonds Exercise 6.7 (1)

library(tidyverse) 
view(diamonds) 

Week 3 Diamonds Exercise 6.7 (2)

library(tidyverse) 
library(dplyr)

view(diamonds) %>%
  arrange(price) %>%
  arrange(desc (price)) %>%
  arrange(cut) %>%
  arrange(desc (price), cut)
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  2.29 Premium   I     VS2      60.8    60 18823  8.5   8.47  5.16
 2  2    Very Good G     SI1      63.5    56 18818  7.9   7.97  5.04
 3  1.51 Ideal     G     IF       61.7    55 18806  7.37  7.41  4.56
 4  2.07 Ideal     G     SI2      62.5    55 18804  8.2   8.13  5.11
 5  2    Very Good H     SI1      62.8    57 18803  7.95  8     5.01
 6  2.29 Premium   I     SI1      61.8    59 18797  8.52  8.45  5.24
 7  2.04 Premium   H     SI1      58.1    60 18795  8.37  8.28  4.84
 8  2    Premium   I     VS1      60.8    59 18795  8.13  8.02  4.91
 9  1.71 Premium   F     VS2      62.3    59 18791  7.57  7.53  4.7 
10  2.15 Ideal     G     SI2      62.6    54 18791  8.29  8.35  5.21
# ℹ 53,930 more rows

Week 3 Diamonds Exercise 6.7 (3)

view(diamonds) %>%
  arrange(desc (price))%>%
  arrange(clarity)
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  4.5  Fair      J     I1       65.8    58 18531 10.2  10.2   6.72
 2  5.01 Fair      J     I1       65.5    59 18018 10.7  10.5   6.98
 3  4.13 Fair      H     I1       64.8    61 17329 10     9.85  6.43
 4  3.01 Ideal     J     I1       65.4    60 16538  8.99  8.93  5.86
 5  3.67 Premium   I     I1       62.4    56 16193  9.86  9.81  6.13
 6  4    Very Good I     I1       63.3    58 15984 10.0   9.94  6.31
 7  3.4  Fair      D     I1       66.8    52 15964  9.42  9.34  6.27
 8  4.01 Premium   I     I1       61      61 15223 10.1  10.1   6.17
 9  4.01 Premium   J     I1       62.5    62 15223 10.0   9.94  6.24
10  3    Premium   G     I1       59.7    60 13203  9.42  9.26  5.58
# ℹ 53,930 more rows

Week 3 Diamonds Exercise 6.7 (4)

library(tidyverse) 
library(dplyr)

diamonds %>% 
  mutate(salePrice = price - 250 ,
         Values ='something',
         Simple = TRUE)
# A tibble: 53,940 × 13
   carat cut       color clarity depth table price     x     y     z salePrice
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>     <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43        76
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31        76
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31        77
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63        84
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75        85
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48        86
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47        86
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53        87
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49        87
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39        88
# ℹ 53,930 more rows
# ℹ 2 more variables: Values <chr>, Simple <lgl>

Week 3 Diamonds Exercise 6.7 (5)

library(tidyverse) 
library(dplyr)

diamonds %>%
  select(-x , -y , -z)
# A tibble: 53,940 × 7
   carat cut       color clarity depth table price
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int>
 1  0.23 Ideal     E     SI2      61.5    55   326
 2  0.21 Premium   E     SI1      59.8    61   326
 3  0.23 Good      E     VS1      56.9    65   327
 4  0.29 Premium   I     VS2      62.4    58   334
 5  0.31 Good      J     SI2      63.3    58   335
 6  0.24 Very Good J     VVS2     62.8    57   336
 7  0.24 Very Good I     VVS1     62.3    57   336
 8  0.26 Very Good H     SI1      61.9    55   337
 9  0.22 Fair      E     VS2      65.1    61   337
10  0.23 Very Good H     VS1      59.4    61   338
# ℹ 53,930 more rows

Week 3 Diamonds Exercise 6.7 (6)

library(tidyverse) 
library(dplyr)

diamonds %>%
  group_by (cut) %>%
  summarise(m = mean(carat))
# A tibble: 5 × 2
  cut           m
  <ord>     <dbl>
1 Fair      1.05 
2 Good      0.849
3 Very Good 0.806
4 Premium   0.892
5 Ideal     0.703

Week 3 Diamonds Exercise 6.7 (7)

diamonds %>% 
  summarise(totalNum = nrow(diamonds))
# A tibble: 1 × 1
  totalNum
     <int>
1    53940

Week 3 Diamonds (Good Ques, Bad Ques)

Good Ques: How can cut and clarity can influence the price of diamond?

Bad Ques: Is there any difference between cuts of diamonds?

Week 4 Data visualisation

library(tidyverse) 
library(dplyr)

plot (mtcars$wt, mtcars$mpg)

library(readxl)

data <- read_excel("SPAD and PPFD.xlsx")
New names:
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`

`

Scientific methods.

  • The scientific method is an empirical method for gaining knowledge.
  • It involves careful observation and interpretion of this observation.
  • it involves creating a hypothesis and, through reasoning and experiment, is solved.

නිරීක්ෂණය/ ගැටළුව觀察/ 問問題Observation/ questionමාතෘකාවගවේෂණය研究吓目標課題Researchtopic areaකල්පිතය提出假說Hypothesis පරීක්ෂණය用實驗嚟驗證Test withexperimentදත්තවිශ්ලේෂණය分析吓啲 dataAnalyzedataඅවසානනිගමනයවිද්‍යාත්මකක්‍රමය做報告總結科學方法ReportconclusionsScientificmethod

  • Development of knowledge before an experiment through Characterizations, hypotheses, and predictions and experiments are necessary.

  • A scientific method can be thought as :

    • Define a question -Gather information and resources -Form a hypothesis -Test the hypothesis by performing an experiment and collecting data in a reproducible manner (For future testing) -Analyze the data -Interpret the data and draw conclusions that serve as a starting point for a new hypothesis -Publish results -Retest (frequently done by other scientists to find knowledge gaps)

How to write a storng hypothesis.

A good hypothesis should have following questions involved in the question:

  • It should review and identify a knowledge gap in the field of study.
  • The study should be repliable and provide results if the conditions are set the same.
  • The statement should be brief and not too descriptive.
  • The statement should provide a clear notion regarding the knowledge gap.
  • It should have a null and alternate hypothesis. Alternate hypothesis will be the opposite of null hypothesis.

Week 5 How to choose the correct analyses & Hypothetico-Deductive reasoning

library(tidyverse)
library(dplyr)
library(modeldata)

Attaching package: 'modeldata'
The following object is masked _by_ '.GlobalEnv':

    penguins
The following object is masked from 'package:palmerpenguins':

    penguins
data("iris")

iris %>%
  mutate(size = ifelse(Sepal.Length < median(Sepal.Length), "small", "big"))
    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species  size
1            5.1         3.5          1.4         0.2     setosa small
2            4.9         3.0          1.4         0.2     setosa small
3            4.7         3.2          1.3         0.2     setosa small
4            4.6         3.1          1.5         0.2     setosa small
5            5.0         3.6          1.4         0.2     setosa small
6            5.4         3.9          1.7         0.4     setosa small
7            4.6         3.4          1.4         0.3     setosa small
8            5.0         3.4          1.5         0.2     setosa small
9            4.4         2.9          1.4         0.2     setosa small
10           4.9         3.1          1.5         0.1     setosa small
11           5.4         3.7          1.5         0.2     setosa small
12           4.8         3.4          1.6         0.2     setosa small
13           4.8         3.0          1.4         0.1     setosa small
14           4.3         3.0          1.1         0.1     setosa small
15           5.8         4.0          1.2         0.2     setosa   big
16           5.7         4.4          1.5         0.4     setosa small
17           5.4         3.9          1.3         0.4     setosa small
18           5.1         3.5          1.4         0.3     setosa small
19           5.7         3.8          1.7         0.3     setosa small
20           5.1         3.8          1.5         0.3     setosa small
21           5.4         3.4          1.7         0.2     setosa small
22           5.1         3.7          1.5         0.4     setosa small
23           4.6         3.6          1.0         0.2     setosa small
24           5.1         3.3          1.7         0.5     setosa small
25           4.8         3.4          1.9         0.2     setosa small
26           5.0         3.0          1.6         0.2     setosa small
27           5.0         3.4          1.6         0.4     setosa small
28           5.2         3.5          1.5         0.2     setosa small
29           5.2         3.4          1.4         0.2     setosa small
30           4.7         3.2          1.6         0.2     setosa small
31           4.8         3.1          1.6         0.2     setosa small
32           5.4         3.4          1.5         0.4     setosa small
33           5.2         4.1          1.5         0.1     setosa small
34           5.5         4.2          1.4         0.2     setosa small
35           4.9         3.1          1.5         0.2     setosa small
36           5.0         3.2          1.2         0.2     setosa small
37           5.5         3.5          1.3         0.2     setosa small
38           4.9         3.6          1.4         0.1     setosa small
39           4.4         3.0          1.3         0.2     setosa small
40           5.1         3.4          1.5         0.2     setosa small
41           5.0         3.5          1.3         0.3     setosa small
42           4.5         2.3          1.3         0.3     setosa small
43           4.4         3.2          1.3         0.2     setosa small
44           5.0         3.5          1.6         0.6     setosa small
45           5.1         3.8          1.9         0.4     setosa small
46           4.8         3.0          1.4         0.3     setosa small
47           5.1         3.8          1.6         0.2     setosa small
48           4.6         3.2          1.4         0.2     setosa small
49           5.3         3.7          1.5         0.2     setosa small
50           5.0         3.3          1.4         0.2     setosa small
51           7.0         3.2          4.7         1.4 versicolor   big
52           6.4         3.2          4.5         1.5 versicolor   big
53           6.9         3.1          4.9         1.5 versicolor   big
54           5.5         2.3          4.0         1.3 versicolor small
55           6.5         2.8          4.6         1.5 versicolor   big
56           5.7         2.8          4.5         1.3 versicolor small
57           6.3         3.3          4.7         1.6 versicolor   big
58           4.9         2.4          3.3         1.0 versicolor small
59           6.6         2.9          4.6         1.3 versicolor   big
60           5.2         2.7          3.9         1.4 versicolor small
61           5.0         2.0          3.5         1.0 versicolor small
62           5.9         3.0          4.2         1.5 versicolor   big
63           6.0         2.2          4.0         1.0 versicolor   big
64           6.1         2.9          4.7         1.4 versicolor   big
65           5.6         2.9          3.6         1.3 versicolor small
66           6.7         3.1          4.4         1.4 versicolor   big
67           5.6         3.0          4.5         1.5 versicolor small
68           5.8         2.7          4.1         1.0 versicolor   big
69           6.2         2.2          4.5         1.5 versicolor   big
70           5.6         2.5          3.9         1.1 versicolor small
71           5.9         3.2          4.8         1.8 versicolor   big
72           6.1         2.8          4.0         1.3 versicolor   big
73           6.3         2.5          4.9         1.5 versicolor   big
74           6.1         2.8          4.7         1.2 versicolor   big
75           6.4         2.9          4.3         1.3 versicolor   big
76           6.6         3.0          4.4         1.4 versicolor   big
77           6.8         2.8          4.8         1.4 versicolor   big
78           6.7         3.0          5.0         1.7 versicolor   big
79           6.0         2.9          4.5         1.5 versicolor   big
80           5.7         2.6          3.5         1.0 versicolor small
81           5.5         2.4          3.8         1.1 versicolor small
82           5.5         2.4          3.7         1.0 versicolor small
83           5.8         2.7          3.9         1.2 versicolor   big
84           6.0         2.7          5.1         1.6 versicolor   big
85           5.4         3.0          4.5         1.5 versicolor small
86           6.0         3.4          4.5         1.6 versicolor   big
87           6.7         3.1          4.7         1.5 versicolor   big
88           6.3         2.3          4.4         1.3 versicolor   big
89           5.6         3.0          4.1         1.3 versicolor small
90           5.5         2.5          4.0         1.3 versicolor small
91           5.5         2.6          4.4         1.2 versicolor small
92           6.1         3.0          4.6         1.4 versicolor   big
93           5.8         2.6          4.0         1.2 versicolor   big
94           5.0         2.3          3.3         1.0 versicolor small
95           5.6         2.7          4.2         1.3 versicolor small
96           5.7         3.0          4.2         1.2 versicolor small
97           5.7         2.9          4.2         1.3 versicolor small
98           6.2         2.9          4.3         1.3 versicolor   big
99           5.1         2.5          3.0         1.1 versicolor small
100          5.7         2.8          4.1         1.3 versicolor small
101          6.3         3.3          6.0         2.5  virginica   big
102          5.8         2.7          5.1         1.9  virginica   big
103          7.1         3.0          5.9         2.1  virginica   big
104          6.3         2.9          5.6         1.8  virginica   big
105          6.5         3.0          5.8         2.2  virginica   big
106          7.6         3.0          6.6         2.1  virginica   big
107          4.9         2.5          4.5         1.7  virginica small
108          7.3         2.9          6.3         1.8  virginica   big
109          6.7         2.5          5.8         1.8  virginica   big
110          7.2         3.6          6.1         2.5  virginica   big
111          6.5         3.2          5.1         2.0  virginica   big
112          6.4         2.7          5.3         1.9  virginica   big
113          6.8         3.0          5.5         2.1  virginica   big
114          5.7         2.5          5.0         2.0  virginica small
115          5.8         2.8          5.1         2.4  virginica   big
116          6.4         3.2          5.3         2.3  virginica   big
117          6.5         3.0          5.5         1.8  virginica   big
118          7.7         3.8          6.7         2.2  virginica   big
119          7.7         2.6          6.9         2.3  virginica   big
120          6.0         2.2          5.0         1.5  virginica   big
121          6.9         3.2          5.7         2.3  virginica   big
122          5.6         2.8          4.9         2.0  virginica small
123          7.7         2.8          6.7         2.0  virginica   big
124          6.3         2.7          4.9         1.8  virginica   big
125          6.7         3.3          5.7         2.1  virginica   big
126          7.2         3.2          6.0         1.8  virginica   big
127          6.2         2.8          4.8         1.8  virginica   big
128          6.1         3.0          4.9         1.8  virginica   big
129          6.4         2.8          5.6         2.1  virginica   big
130          7.2         3.0          5.8         1.6  virginica   big
131          7.4         2.8          6.1         1.9  virginica   big
132          7.9         3.8          6.4         2.0  virginica   big
133          6.4         2.8          5.6         2.2  virginica   big
134          6.3         2.8          5.1         1.5  virginica   big
135          6.1         2.6          5.6         1.4  virginica   big
136          7.7         3.0          6.1         2.3  virginica   big
137          6.3         3.4          5.6         2.4  virginica   big
138          6.4         3.1          5.5         1.8  virginica   big
139          6.0         3.0          4.8         1.8  virginica   big
140          6.9         3.1          5.4         2.1  virginica   big
141          6.7         3.1          5.6         2.4  virginica   big
142          6.9         3.1          5.1         2.3  virginica   big
143          5.8         2.7          5.1         1.9  virginica   big
144          6.8         3.2          5.9         2.3  virginica   big
145          6.7         3.3          5.7         2.5  virginica   big
146          6.7         3.0          5.2         2.3  virginica   big
147          6.3         2.5          5.0         1.9  virginica   big
148          6.5         3.0          5.2         2.0  virginica   big
149          6.2         3.4          5.4         2.3  virginica   big
150          5.9         3.0          5.1         1.8  virginica   big