Here we provide example code in R and Python for loading multiple datasets in IRW and performing some summary computations over them. This should be a useful starting point for conducting your own analyses of the data. You may also be interested in the functionality embedded in the irw
package .
Code
library (dplyr)
library (purrr)
dataset_names <- c ("4thgrade_math_sirt" , "chess_lnirt" , "dd_rotation" )
compute_metadata <- function (df) {
df <- df |> filter (! is.na (resp)) |> mutate (resp = as.numeric (resp))
tibble (
n_responses = nrow (df),
n_categories = n_distinct (df$ resp),
n_participants = n_distinct (df$ id),
n_items = n_distinct (df$ item),
responses_per_participant = n_responses / n_participants,
responses_per_item = n_responses / n_items,
sparsity = (sqrt (n_responses) / n_participants) * (sqrt (n_responses) / n_items)
)
}
dataset <- redivis:: user ("datapages" )$ dataset ("item_response_warehouse" )
get_data_summary <- function (dataset_name) {
# fetch data
df <- dataset$ table (dataset_name)$ to_tibble ()
# compute summary
compute_metadata (df) |> mutate (dataset_name = dataset_name, .before = everything ())
}
summaries_list <- map (dataset_names, get_data_summary)
summaries <- bind_rows (summaries_list)
summaries
4thgrade_math_sirt
19920
2
664
30
30.000000
664.0
1.0000000
chess_lnirt
10240
2
256
40
40.000000
256.0
1.0000000
dd_rotation
1178
2
121
10
9.735537
117.8
0.9735537
Code
import pandas as pd
from math import sqrt
import redivis
dataset_names = ["4thgrade_math_sirt" , "chess_lnirt" , "dd_rotation" ]
def compute_metadata(df):
df = (df
.loc[~ df['resp' ].isna()]
.assign(resp= pd.to_numeric(df['resp' ]))
)
return pd.DataFrame({
'n_responses' : [len (df)],
'n_categories' : [df['resp' ].nunique()],
'n_participants' : [df['id' ].nunique()],
'n_items' : [df['item' ].nunique()],
'responses_per_participant' : [len (df) / df['id' ].nunique()],
'responses_per_item' : [len (df) / df['item' ].nunique()],
'sparsity' : [(sqrt(len (df)) / df['id' ].nunique()) * (sqrt(len (df)) / df['item' ].nunique())]
})
dataset = redivis.user('datapages' ).dataset('item_response_warehouse' )
def get_data_summary(dataset_name):
df = pd.DataFrame(dataset.table(dataset_name).to_pandas_dataframe())
summary = compute_metadata(df)
summary.insert(0 , 'dataset_name' , dataset_name)
return summary
summaries_list = [get_data_summary(name) for name in dataset_names]
summaries = pd.concat(summaries_list, ignore_index= True )
print (summaries)
Here is a slightly more complex example showing how to compute the InterModel Vigorish contrasting predictings for the 2PL to predictions from the 1PL for an example dataset (using cross-validation across 4 folds).
Code
library (mirt)
library (irw)
dataset <- redivis:: user ("datapages" )$ dataset ("item_response_warehouse" )
df <- dataset$ table ("gilbert_meta_2" )$ to_data_frame ()
items<- unique (df$ item)
if (all (items %in% 1 : length (items))) {
df$ item<- paste ("item_" ,df$ item,sep= '' )
items<- unique (df$ item)
}
resp<- irw:: long2resp (df)
id<- resp$ id
resp$ id<- NULL
##cross-validation for models estimated in mirt
set.seed (8675309 )
ntimes<- 4
df$ gr<- sample (1 : ntimes,nrow (df),replace= TRUE )
x.hold<- df
omega<- numeric ()
for (i in 1 : ntimes) {
x<- x.hold
x$ oos<- ifelse (x$ gr== i,1 ,0 )
x0<- x[x$ oos== 0 ,]
resp0<- data.frame (irw:: long2resp (x0))
id<- resp0$ id
resp0$ id<- NULL
##rasch model
m0<- mirt (resp0,1 ,'Rasch' ,verbose= FALSE )
##2pl
ni<- ncol (resp0)
s<- paste ("F=1-" ,ni,"
PRIOR = (1-" ,ni,", a1, lnorm, 0.0, 1.0)" ,sep= "" )
model<- mirt.model (s)
m1<- mirt (resp0,model,itemtype= rep ("2PL" ,ni),method= "EM" ,technical= list (NCYCLES= 10000 ),verbose= FALSE )
##
z0<- getp (m0,x= x[x$ oos== 1 ,],id= id)
z1<- getp (m1,x= x[x$ oos== 1 ,],id= id)
z0<- z0[,c ("item" ,"id" ,"resp" ,"p" )]
names (z0)[4 ]<- 'p1'
z1<- z1[,c ("item" ,"id" ,"p" )]
names (z1)[3 ]<- 'p2'
z<- merge (z0,z1)
omega[i]<- imv (z,p1= "p1" ,p2= "p2" )
}
mean (omega)