library(tidyverse)

t <- read_rds("final-oer-commons-data.rds")

# library(rvest)
# standards <- "https://www.nextgenscience.org/overview-dci" %>% 
#   read_html() %>% 
#   html_nodes("#bootstrap-fieldgroup-accordion--accordion .even a") %>% 
#   html_text()
# 
# standards_1 <- standards %>% 
#   str_split("  ") %>% 
#   map_chr(~.[[1]])
# 
# standards_2 <- standards %>% 
#   str_split("  ") %>% 
#   map_chr(possibly(~.[[2]], NA))
# 
# tibble(standards_1, standards_2) %>% 
#   write_csv("standards.csv")

standards <- read_csv("standards.csv") %>% 
  mutate(code = tolower(code)) %>% 
  rename(standards = code)

t <- t %>% 
  distinct(unique_links, .keep_all = TRUE)

nrow(t) # 48496
## [1] 48496

prop science

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  distinct(unique_links) # 8937
## # A tibble: 8,937 × 1
##    unique_links
##    <list>      
##  1 <chr [1]>   
##  2 <chr [1]>   
##  3 <chr [1]>   
##  4 <chr [1]>   
##  5 <chr [1]>   
##  6 <chr [1]>   
##  7 <chr [1]>   
##  8 <chr [1]>   
##  9 <chr [1]>   
## 10 <chr [1]>   
## # … with 8,927 more rows
8937/48496 # .184
## [1] 0.1842832
### good representation of science

prop subject

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  group_by(subject) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>% 
  mutate(prop = n / 8937) %>% 
  arrange(desc(prop)) %>% 
  knitr::kable()
subject n mean_views prop
Life Science 6208 37.05928 0.6946403
Applied Science 1950 86.62256 0.2181940
Physical Science 1940 65.21392 0.2170751
### lots of life science

prop level

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(level) %>% 
  mutate(level = str_trim(level),
         level = str_split(level, ", ")) %>% 
  unnest(level) %>% 
  group_by(level) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>% 
  mutate(level = str_trim(level)) %>% 
  filter(level %in% c("Preschool", "Lower Primary",
                      "Upper Primary", "Middle School",
                      "High School", "Community College / Lower Division",
                      "College / Upper Division", 
                      "Graduate / Professional",
                      "Career / Technical", 
                      "Adult Education")) %>% 
  mutate(prop = n / 8937) %>% 
  arrange(desc(n)) %>% 
  knitr::kable()
level n mean_views prop
College / Upper Division 4893 55.41978 0.5474992
Community College / Lower Division 4847 65.80276 0.5423520
High School 2818 41.03833 0.3153183
Career / Technical 2555 17.37769 0.2858901
Graduate / Professional 1906 16.26443 0.2132707
Middle School 1875 34.66187 0.2098019
Upper Primary 1130 60.93540 0.1264406
Lower Primary 662 58.01662 0.0740741
Adult Education 206 110.87864 0.0230502
Preschool 113 75.11504 0.0126441
### different from TPT - more upper-level

interactions

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>%
  mutate(stars = as.integer(stars)) %>% 
  select(stars:comments) %>% 
  skimr::skim()
Data summary
Name Piped data
Number of rows 10098
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
stars 0 1 0.13 0.77 0 0 0 0 5 ▇▁▁▁▁
views 0 1 52.04 540.16 0 0 1 14 47425 ▇▁▁▁▁
saves 0 1 1.74 4.36 0 0 0 2 137 ▇▁▁▁▁
comments 0 1 0.04 0.40 0 0 0 0 14 ▇▁▁▁▁

materials

t %>%
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(material_type) %>% 
  filter(material_type %in% c(
    "Activity/Lab",
    "Assessment",
    "Case Study",
    "Data Set",
    "Diagram/Illustration",
    "Full Course",
    "Game",
    "Homework/Assignment",
    "Interactive",
    "Lecture",
    "Lecture Notes",
    "Lesson", 
    "Lesson Plan",
    "Module",
    "Primary Source",
    "Reading", 
    "Simulation",
    "Student Guide",
    "Syllabus",
    "Teaching/Learning Strategy",
    "Textbook",
    "Unit of Study"
  )) %>% 
  unnest(material_type) %>% 
  filter(material_type != ", ") %>% 
  group_by(material_type) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>% 
  mutate(prop = n / 8937) %>% 
  arrange(desc(n)) %>% 
  knitr::kable()
material_type n mean_views prop
Reading 2840 4.158803 0.3177800
Activity/Lab 1694 60.649351 0.1895491
Module 860 235.158140 0.0962292
Lesson 792 15.228535 0.0886203
Case Study 788 1.659898 0.0881728
Lesson Plan 549 59.566485 0.0614300
Lecture 447 28.572707 0.0500168
Diagram/Illustration 436 44.422018 0.0487859
Full Course 254 223.826772 0.0284212
Assessment 253 47.956522 0.0283093
Simulation 219 7.602740 0.0245049
Interactive 171 23.017544 0.0191339
Textbook 94 148.053191 0.0105181
Teaching/Learning Strategy 90 82.366667 0.0100705
Homework/Assignment 86 106.813953 0.0096229
Unit of Study 85 55.435294 0.0095110
Lecture Notes 64 55.578125 0.0071612
Data Set 48 32.604167 0.0053709
Syllabus 23 38.434783 0.0025736
Game 19 34.263158 0.0021260
Primary Source 18 687.555556 0.0020141
Student Guide 17 31.352941 0.0019022

date added

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(date_added) %>% 
  mutate(date_added = lubridate::mdy(date_added)) %>% 
  mutate(year = lubridate::year(date_added)) %>% 
  group_by(year) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>%  
  mutate(prop = n / 8937) %>% 
  arrange(desc(year)) %>% 
  knitr::kable()
year n mean_views prop
2022 1026 8.4561404 0.1148036
2021 1254 20.5007974 0.1403155
2020 701 51.8473609 0.0784380
2019 476 66.1974790 0.0532617
2018 784 55.6645408 0.0877252
2017 1200 206.6675000 0.1342732
2016 462 214.5865801 0.0516952
2015 266 14.8759398 0.0297639
2014 447 17.0738255 0.0500168
2013 53 5.2075472 0.0059304
2012 316 19.5094937 0.0353586
2011 129 31.3953488 0.0144344
2010 136 21.1985294 0.0152176
2009 39 16.4615385 0.0043639
2008 90 18.0333333 0.0100705
2007 42 17.8809524 0.0046996
2006 3 12.3333333 0.0003357
2005 882 0.4376417 0.0986908
2004 85 0.2588235 0.0095110
2003 254 14.5669291 0.0284212
2002 353 0.1048159 0.0394987
2001 328 0.7286585 0.0367014
2000 418 0.0669856 0.0467718
1999 353 0.1189802 0.0394987
1994 1 3.0000000 0.0001119

license

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(license) %>% 
  mutate(license = str_trim(license)) %>% 
  group_by(license) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>% 
  arrange(desc(n)) %>% 
  mutate(prop = n / 8937) %>% 
  filter(n >= 10) %>% 
  knitr::kable()
license n mean_views prop
Creative Commons Attribution Non-Commercial Share Alike 3807 17.7344366 0.4259819
Creative Commons Attribution Non-Commercial 1166 203.2735849 0.1304688
Creative Commons Attribution 1090 76.0770642 0.1219649
Some Rights Reserved 882 0.4931973 0.0986908
Creative Commons Attribution Non-Commercial No Derivatives 608 69.6200658 0.0680318
Educational Use Permitted 485 118.4865979 0.0542688
Creative Commons Attribution Share Alike 293 41.6689420 0.0327851
Public Domain Dedication 78 15.7820513 0.0087278
Creative Commons Attribution No Derivatives 41 33.2439024 0.0045877
http://www.jonathanbird.net/jbird.html 36 22.2500000 0.0040282
Creative Commons Attribution 4.0 International 31 11.3548387 0.0034687
http://concord.org/license 23 142.8695652 0.0025736
http://www.cellsalive.com/permissn.htm 16 37.5000000 0.0017903
https://creativecommons.org/licenses/publicdomain/ 15 5.4000000 0.0016784
https://toolkit.climate.gov/about/faq 15 1.8666667 0.0016784

endorsements

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(endorsements) %>% 
  mutate(endorsements = str_replace_all(endorsements, "\n                    ", "")) %>% 
  mutate(endorsements = str_trim(endorsements)) %>% 
  mutate(endorsements = str_replace_all(endorsements, "   ", " ")) %>% 
  group_by(endorsements) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>%  
  mutate(prop = n / 8937) %>% 
  arrange(desc(n)) %>% 
  knitr::kable()
endorsements n mean_views prop
OERC Reviewed K-12 Collections 307 15.270358 0.0343516
OSKB 206 16.956311 0.0230502
Out Teach 133 22.368421 0.0148820
Open Illinois 113 507.185841 0.0126441
Nebraska Department of Education 68 58.970588 0.0076088
MOREnet 52 3.923077 0.0058185
OER Colorado 38 60.052632 0.0042520
AIAA 32 41.656250 0.0035806
OERC Reviewed Textbooks 24 119.250000 0.0026855
Scholarly Communication Notebook 24 3.958333 0.0026855
POWER Library 22 222.727273 0.0024617
Wyoming Department of Education 13 198.076923 0.0014546
Washington Office of Superintendent of Public Instruction 11 571.636364 0.0012308
Open Textbook Library 10 61.400000 0.0011189
University of Hawai’i OER Project 9 5440.555556 0.0010070
ESUCC 8 19.750000 0.0008952
Open Washington Project 6 150.833333 0.0006714
BranchEd 3 275.000000 0.0003357
Oregon State University 3 244.000000 0.0003357
Minnesota Catalog 2 151.500000 0.0002238
Primary Sources 2 265.500000 0.0002238
Alabama Virtual Library 1 396.000000 0.0001119
Maryland State Department of Education 1 13.000000 0.0001119

standards

t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(standards) %>% 
  unnest(standards) %>%
  mutate(standards = tolower(standards)) %>% 
  filter(str_detect(standards, "ngss")) %>% 
  mutate(standards = str_sub(standards, start = 6)) %>% 
  mutate(standards = str_replace_all(standards, "\\.", "-")) %>% 
  mutate(standards = str_sub(standards, end = -3)) %>% 
  group_by(standards) %>% 
  summarize(n = n(),
            mean_views = mean(views)) %>%  
  left_join(standards) %>% 
  mutate(prop = n / 8937) %>% 
  arrange(desc(n)) %>% 
  knitr::kable()
standards n mean_views name prop
ms-ets1 87 71.64368 Engineering Design 0.0097348
4-ls1 54 62.31481 From Molecules to Organisms: Structures and Processes 0.0060423
3-5-ets1 46 52.82609 Engineering Design 0.0051471
3-5-ets1 46 52.82609 Engineering Design 0.0051471
3-5-ets1 46 52.82609 Engineering Design 0.0051471
ms-ls2 46 55.45652 Ecosystems: Interactions, Energy, and Dynamics 0.0051471
ms-ls1 43 107.69767 From Molecules to Organisms: Structures and Processes 0.0048115
ms-ps1 41 65.95122 Matter and its Interactions 0.0045877
3-ls4 39 79.48718 Biological Evolution: Unity and Diversity 0.0043639
3-ls3 38 74.21053 Heredity: Inheritance and Variation of Traits 0.0042520
k-2-ets1 35 49.51429 Engineering Design 0.0039163
k-2-ets1 35 49.51429 Engineering Design 0.0039163
k-2-ets1 35 49.51429 Engineering Design 0.0039163
hs-ps1 34 273.85294 Matter and its Interactions 0.0038044
hs-ets1 33 146.24242 Engineering Design 0.0036925
hs-ls4 30 38.73333 Biological Evolution: Unity and Diversity 0.0033568
ms-ess2 29 68.37931 Earth’s Systems 0.0032449
ms-ps3 29 78.31034 Energy 0.0032449
k-ess3 26 60.61538 Earth and Human Activity 0.0029093
4-ps3 24 49.58333 Energy 0.0026855
hs-ls1 24 205.70833 From Molecules to Organisms: Structures and Processes 0.0026855
1-ls1 23 71.34783 From Molecules to Organisms: Structures and Processes 0.0025736
5-ps1 23 77.13043 Matter and Its Interactions 0.0025736
1-ps4 22 49.13636 Waves and Their Applications in Technologies for Information Transfer 0.0024617
5-ls2 22 43.81818 Ecosystems: Interactions, Energy, and Dynamics 0.0024617
ms-ess3 22 50.22727 Earth and Human Activity 0.0024617
5-ess2 21 304.42857 Earth’s Systems 0.0023498
hs-ess2 21 209.38095 Earth’s Systems 0.0023498
hs-ps3 21 355.47619 Energy 0.0023498
ms-ess1 21 49.09524 Earth’s Place in the Universe 0.0023498
2-ps1 19 66.89474 Matter and Its Interactions 0.0021260
hs-ls2 19 324.26316 Ecosystems: 0.0021260
hs-ls3 19 92.63158 Heredity: Inheritance and Variation of Traits 0.0021260
2-ess2 18 88.33333 Earth’s Systems 0.0020141
5-ess3 18 140.77778 Earth and Human Activity 0.0020141
hs-ps2 18 255.00000 Motion and Stability: Forces and Interactions 0.0020141
ms-ls4 18 47.61111 Biological Evolution: Unity and Diversity 0.0020141
2-ls2 17 60.47059 Ecosystems: Interactions, Energy, and Dynamics 0.0019022
2-ls4 17 56.17647 Biological Evolution: Unity and Diversity 0.0019022
3-ls1 17 89.47059 From molecules to Organisms: Structures and Processes 0.0019022
5-ls1 16 177.56250 From Molecules to Organisms: Structures and Processes 0.0017903
k-ess2 16 70.31250 Earth’s Systems 0.0017903
hs-ess3 15 74.86667 Earth and Human Activity 0.0016784
hs-ps4 15 230.06667 Waves and their Applications in Technologies for Information Transfer 0.0016784
k-ls1 15 40.33333 From Molecules to Organisms: Structures and Processes 0.0016784
3-ls2 14 98.00000 Ecosystems: Interactions, Energy, and Dynamics 0.0015665
3-ps2 14 87.00000 Motion and Stability: Forces and Interactions 0.0015665
5-ess1 13 28.84615 Earth’s Place in the Universe 0.0014546
k-ps3 13 47.76923 Energy 0.0014546
5-ps3 12 216.50000 Energy 0.0013427
1-ess1 11 55.81818 Earth’s Place in the Universe 0.0012308
3-ess2 11 59.81818 Earth’s Systems 0.0012308
4-ess2 11 54.18182 Earth’s Systems 0.0012308
ms-ls3 11 66.00000 Heredity: Inheritance and Variation of Traits 0.0012308
ms-ps4 11 50.90909 Waves and their Applications in Technologies for Information Transfer 0.0012308
1-ls3 10 64.40000 Heredity: Inheritance and Variation of Traits 0.0011189
4-ps4 10 42.90000 Waves and Their Applications in Technologies for Information Transfer 0.0011189
hs-ess1 8 649.00000 Earth’s Place in the Universe 0.0008952
ms-ps2 8 47.25000 Motion and Stability: Forces and Interactions 0.0008952
k-ps2 7 64.71429 Motion and Stability: Forces and Interactions 0.0007833
3-ess3 5 53.40000 Earth and Human Activity 0.0005595
2-ess1 4 57.50000 Earth’s Place in the Universe 0.0004476
4-ess3 4 69.75000 Earth and Human Activity 0.0004476
4-ess1 3 21.00000 Earth’s Place in the Universe 0.0003357
5-ps2 3 36.66667 Motion and Stability: Forces and Interactions 0.0003357
t %>% 
  unnest(subject) %>% 
  filter(subject %in% c("Applied Science", "Life Science", "Physical Science")) %>% 
  unnest(standards) %>% 
  unnest(standards) %>%
  mutate(standards = tolower(standards)) %>% 
  filter(str_detect(standards, "ngss")) %>% 
  nrow()
## [1] 1294
2
## [1] 2