Numberbank

Plot = import("https://esm.sh/@observablehq/plot@0.6.13")
import {interval} from '@mootari/range-slider' // two ended slider

fontFamily = "Source Sans Pro"
fontSize = ".7rem"
age_label = "Age (months)"
filter_error = '<p class="filter-error"><i class="bi bi-exclamation-triangle"></i> No data found for selected filters</p>'

kl = transpose(kl_data)
tr = transpose(trial_data)

Facet by…

// facets
viewof fs = Inputs.checkbox(["language", "country", "method"],
                            {value: ["language"]})

fx = fs.includes("method") ? "method" : null // fx = "method" or null
fy = fs.filter(x => x !== "method").join("_")

Filter by…

viewof ages = interval([d3.min(kl_data.age), d3.max(kl_data.age)],
                       {step: 1, label: "Age (months)"})

// kl filter
cp_sort = (a, b) => a === "CP-knower" ? 1 : b === "CP-knower" ? -1 : a.localeCompare(b);
kl_vals = Array.from(new Set(kl_data.kl)).sort(cp_sort) // options
kl_defs = kl_vals.filter(s => /^[123C]/.test(s)) // default
viewof kls = Inputs.select(kl_vals, {label: "Knower levels", multiple: true, value: kl_defs})

// kl subset toggle
viewof kl_sub = Inputs.checkbox(["Group non-CP-knowers together"])
kl_y = kl_sub.length ? "kl_subset" : "kl"

// language filter
lang_vals = Array.from(new Set(kl_data.language_countries)).sort() // options
lang_defs = lang_vals.filter(s => /English/.test(s))
viewof langs = Inputs.select(lang_vals, {label: "Languages", multiple: true, value: lang_defs})

// country filter
country_vals = Array.from(new Set(kl_data.country_languages)).sort() // options
country_defs = country_vals.filter(s => /English/.test(s))
viewof countries = Inputs.select(country_vals, {label: "Countries", multiple: true, value: country_defs})

// dataset filter
dataset_vals = Array.from(new Set(kl_data.dataset_id)).sort() // options
dataset_defs = dataset_vals // default
viewof datasets = Inputs.select(dataset_vals, {label: "Datasets", multiple: true, value: dataset_defs})

klf = kl.filter(d => langs.includes(d.language_countries))
        .filter(d => countries.includes(d.country_languages))
        .filter(d => datasets.includes(d.dataset_id))
        .filter(d => kls.includes(d.kl))
        .filter(d => d.age >= ages[0] && d.age <= ages[1])

// possible kl values in filtered data
klfv = Array.from(new Set(klf.map(d => d[kl_y]))).sort(cp_sort)

kl_found = klf.length > 0

Give-N
Highest count

Note that some queries may not return data. To ensure an output, we recommend selecting all countries and languages and removing them one-by-one.

html`${kl_found ? '' : filter_error}`

ss_margin = 180
kl_found ? Plot.plot({
  title: "Counts of knower level in each dataset",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginLeft: ss_margin,
  color: { legend: true, domain: klfv, marginLeft: ss_margin},
  x: { label: "", labelAnchor: "center", labelArrow: "none", axis: "top" },
  y: { label: "Dataset", labelAnchor: "top", tickSize: 0, tickPadding: 2 },
  marks: [
    Plot.barX(klf, Plot.groupY(
      { x: "count" },
      { fill: kl_y, y: "dataset_id", inset: 1, sort: { y: "x", reverse: true, } }
    )),
    Plot.textX(klf, Plot.stackX(Plot.groupY(
      { x: "count", text: "count" },
      { y: "dataset_id", z: kl_y, fill: "white" }
    ))),
  ]
}) : html``

// boxplot
kl_found ? Plot.plot({
  title: "Distribution of knower level over age",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginLeft: 60,
  marginRight: 100,
  x: { label: age_label, grid: true, inset: 10, line: true, labelAnchor: "center", labelArrow: "none" },
  y: { label: "Knower level", labelAnchor: "top", tickSize: 0, tickPadding: 2, domain: klfv },
  color: { domain: klfv },
  facet: { label: null },
  marks: [
    Plot.boxX(klf, {
      x: "age",
      y: kl_y,
      fx: fx,
      fy: fy,
      fill: kl_y
    })
  ]
}) : html``

// cumulative probability plot
kl_found ? Plot.plot({
  title: "Cumulative probability of knower level over age",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginRight: 100,
  color: { legend: true, domain: klfv },
  x: { label: age_label, grid: true, inset: 10, line: true, labelAnchor: "center", labelArrow: "none" },
  y: { label: "Cumulative probability of knower level", inset: 5, line: true, labelAnchor: "center", labelArrow: "none" },
  facet: { label: null },
  marks: [
    Plot.lineY(klf, Plot.normalizeY("extent", // scale to 0-1
                    Plot.mapY("cumsum", // cumulative sum
                    Plot.binY({ y: "count" }, { // count in each age bin
      x: "age",
      stroke: kl_y,
      fx: fx,
      fy: fy
    }))))
  ]
}) : html``

Note that some queries may not return data. To ensure an output, we recommend selecting all countries and languages and removing them one-by-one.

trf = tr.filter(d => langs.includes(d.language_countries))
        .filter(d => countries.includes(d.country_languages))
        .filter(d => datasets.includes(d.dataset_id))
        .filter(d => kls.includes(d.kl))
        .filter(d => d.age >= ages[0] && d.age <= ages[1])

tr_found = trf.length > 0

html`${tr_found ? '' : filter_error}`

rc_map = d3.rollup(trf, v => v.length, d => d[kl_y], d => d.query, d => d.response)

rc = Array.from(rc_map, ([kl, kl_responses]) =>
  Array.from(kl_responses, ([query, query_responses]) => 
    Array.from(query_responses, ([response, count]) =>
      ({ kl, query, response, count, total: d3.sum(query_responses.values())})
    ).map(d => ({ ...d, proportion: d.count / d.total }))
  ).flat()
).flat()

tr_found ? Plot.plot({
  title: "Distribution of responses by knower level",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  width: 950,
  height: 120 * rc_map.size,
  marginRight: 100,
  x: { label: "Response" },
  y: { label: "Proportion responses", grid: true, domain: [0, 1] },
  fx: { label: "Query" },
  fy: { label: null, padding: 0.2, domain: klfv },
  color: { domain: klfv },
  marks: [
    Plot.rectY(rc, {
      x: "response",
      y: "proportion",
      fx: "query",
      fy: "kl",
      fill: "kl",
      padding: 0.03
    }),
    Plot.axisX({ facetAnchor: null }),
    Plot.axisY({ facetAnchor: "left", labelAnchor: "center", labelArrow: "none" }),
    Plot.ruleY([0])
  ]
}) : html``

Plots
Data

Note that some queries may not return data. To ensure an output, we recommend selecting all countries and languages and removing them one-by-one.

hcf = klf.filter(s => s.hc)
hc_found = hcf.length > 0

html`${hc_found ? '' : filter_error}`

// sample size plot
hc_found ? Plot.plot({
  title: "Counts in each dataset",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginLeft: ss_margin,
  x: { label: "", labelAnchor: "center", labelArrow: "none", axis: "top" },
  y: { label: "Dataset", labelAnchor: "top", tickSize: 0, tickPadding: 2 },
  //x: { axis: "top", label: "Count" },
  //y: { label: "Dataset" },
  marks: [
    Plot.barX(hcf, Plot.groupY(
      { x: "count" },
      { y: "dataset_id", inset: 1, sort: { y: "x", reverse: true, } } //order: "sum"
    )),
    Plot.textX(hcf, Plot.stackX(Plot.groupY(
      { x: "count", text: "count" },
      { y: "dataset_id", fill: "white", } //order: "sum" }
    ))),
  ]
}) : html``

hc_found ? Plot.plot({
  title: "Highest count over age",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginRight: 100,
  //color: { legend: true, domain: klfv },
  x: { label: age_label, grid: true, inset: 10, line: true, labelAnchor: "center", labelArrow: "none" },
  y: { label: "Highest count", inset: 5, line: true, labelAnchor: "center", labelArrow: "none" },
  //x: { label: age_label, grid: true, inset: 10 },
  //y: { label: "Highest count", inset: 10 },
  facet: { label: null },
  marks: [
    //Plot.frame(),
    Plot.dot(hcf, {
      x: "age",
      y: "hc",
      fill: "black",
      fx: fx,
      fy: fy
    })
  ]
}) : html``

hc_found ? Plot.plot({
  title: "Distribution of highest count",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginRight: 100,
  //color: { legend: true, domain: klfv },
  x: { label: "Highest count", grid: true, inset: 10, line: true, labelAnchor: "center", labelArrow: "none" },
  y: { line: true, labelAnchor: "center", labelArrow: "none" },
  //x: { label: "Highest count", grid: true, inset: 10 },
  //y: { inset: 10 },
  facet: { label: null },
  marks: [
    //Plot.frame(),
    Plot.rectY(hcf, Plot.binX({y: "count"}, {
      x: "hc",
      fx: fx,
      fy: fy
    }))
  ]
}) : html``

// boxplot
hc_found ? Plot.plot({
  title: "Distribution of highest count by knower level",
  style: { fontFamily: fontFamily, fontSize: fontSize },
  marginLeft: 60,
  marginRight: 100,
  x: { label: "Highest count", grid: true, inset: 10, line: true, labelAnchor: "center", labelArrow: "none" },
  y: { label: "Knower level", labelAnchor: "top", tickSize: 0, tickPadding: 2, domain: klfv },
  //x: { label: "Highest count", grid: true, inset: 10 },
  //y: { label: "Knower level", domain: klfv },
  color: { domain: klfv },
  facet: { label: null },
  marks: [
    Plot.boxX(hcf, {
      x: "hc",
      y: kl_y,
      fx: fx,
      fy: fy,
      fill: kl_y
    })
  ]
}) : html``