Data analysis

Here we provide example code in R and Python for loading multiple datasets in IRW and performing some summary computations over them. This should be a useful starting point for conducting your own analyses of the data. You may also be interested in the functionality embedded in the irw package.

Code
library(dplyr)
library(purrr)

dataset_names <- c("4thgrade_math_sirt", "chess_lnirt", "dd_rotation")

compute_metadata <- function(df) {
  df <- df |> filter(!is.na(resp)) |> mutate(resp = as.numeric(resp))
  tibble(
    n_responses = nrow(df),
    n_categories = n_distinct(df$resp),
    n_participants = n_distinct(df$id),
    n_items = n_distinct(df$item),
    responses_per_participant = n_responses / n_participants,
    responses_per_item = n_responses / n_items,
    sparsity = (sqrt(n_responses) / n_participants) * (sqrt(n_responses) / n_items)
  )
}

dataset <- redivis::user("datapages")$dataset("item_response_warehouse")
get_data_summary <- function(dataset_name) {
  # fetch data
  df <- dataset$table(dataset_name)$to_tibble()
  # compute summary
  compute_metadata(df) |> mutate(dataset_name = dataset_name, .before = everything())
}

summaries_list <- map(dataset_names, get_data_summary)
summaries <- bind_rows(summaries_list)
summaries
dataset_name n_responses n_categories n_participants n_items responses_per_participant responses_per_item sparsity
4thgrade_math_sirt 19920 2 664 30 30.000000 664.0 1.0000000
chess_lnirt 10240 2 256 40 40.000000 256.0 1.0000000
dd_rotation 1178 2 121 10 9.735537 117.8 0.9735537
Code
import pandas as pd
from math import sqrt
import redivis

dataset_names = ["4thgrade_math_sirt", "chess_lnirt", "dd_rotation"]

def compute_metadata(df):
    df = (df
          .loc[~df['resp'].isna()]
          .assign(resp=pd.to_numeric(df['resp']))
         )
    
    return pd.DataFrame({
        'n_responses': [len(df)],
        'n_categories': [df['resp'].nunique()],
        'n_participants': [df['id'].nunique()],
        'n_items': [df['item'].nunique()],
        'responses_per_participant': [len(df) / df['id'].nunique()],
        'responses_per_item': [len(df) / df['item'].nunique()],
        'sparsity': [(sqrt(len(df)) / df['id'].nunique()) * (sqrt(len(df)) / df['item'].nunique())]
    })

dataset = redivis.user('datapages').dataset('item_response_warehouse')
def get_data_summary(dataset_name):
  df = pd.DataFrame(dataset.table(dataset_name).to_pandas_dataframe())
    
  summary = compute_metadata(df)
  summary.insert(0, 'dataset_name', dataset_name)
  return summary

summaries_list = [get_data_summary(name) for name in dataset_names]
summaries = pd.concat(summaries_list, ignore_index=True)
print(summaries)

Here is a slightly more complex example showing how to compute the InterModel Vigorish contrasting predictings for the 2PL to predictions from the 1PL for an example dataset (using cross-validation across 4 folds).

Code
library(mirt)
library(irw) 

dataset <- redivis::user("datapages")$dataset("item_response_warehouse")
df <- dataset$table("gilbert_meta_2")$to_data_frame()
items<-unique(df$item)
if (all(items %in% 1:length(items))) {
    df$item<-paste("item_",df$item,sep='')
    items<-unique(df$item)
}
resp<-irw::long2resp(df)
id<-resp$id
resp$id<-NULL

##cross-validation for models estimated in mirt
set.seed(8675309)
ntimes<-4
df$gr<-sample(1:ntimes,nrow(df),replace=TRUE)
x.hold<-df
omega<-numeric()
for (i in 1:ntimes) {
    x<-x.hold
    x$oos<-ifelse(x$gr==i,1,0)
    x0<-x[x$oos==0,]
    resp0<-data.frame(irw::long2resp(x0))
    id<-resp0$id
    resp0$id<-NULL
    ##rasch model
    m0<-mirt(resp0,1,'Rasch',verbose=FALSE)
    ##2pl
    ni<-ncol(resp0)
    s<-paste("F=1-",ni,"
             PRIOR = (1-",ni,", a1, lnorm, 0.0, 1.0)",sep="")
    model<-mirt.model(s)
    m1<-mirt(resp0,model,itemtype=rep("2PL",ni),method="EM",technical=list(NCYCLES=10000),verbose=FALSE)
    ##
    z0<-getp(m0,x=x[x$oos==1,],id=id)
    z1<-getp(m1,x=x[x$oos==1,],id=id)
    z0<-z0[,c("item","id","resp","p")]
    names(z0)[4]<-'p1'
    z1<-z1[,c("item","id","p")]
    names(z1)[3]<-'p2'
    z<-merge(z0,z1)
    omega[i]<-imv(z,p1="p1",p2="p2")
}
mean(omega)
[1] 0.01274407