join_propensity_scores.RdHelper function to perform a secure join of a comparison matrix with propensity scores.
join_propensity_scores(input_data, label_weights)
join_propensity_scores_dplyr(input_data, label_weights)The input data.frame input_data with an additional column
"label_weight".
join_propensity_scores_dplyr(): Variant with dplyr based
internals rather than collapse internals.
library(casimir)
gold <- tibble::tribble(
~doc_id, ~label_id,
"A", "a",
"A", "b",
"A", "c",
"B", "a",
"B", "d",
"C", "a",
"C", "b",
"C", "d",
"C", "f"
)
pred <- tibble::tribble(
~doc_id, ~label_id,
"A", "a",
"A", "d",
"A", "f",
"B", "a",
"B", "e",
"C", "f"
)
label_distribution <- tibble::tribble(
~label_id, ~label_freq, ~n_docs,
"a", 10000, 10100,
"b", 1000, 10100,
"c", 100, 10100,
"d", 1, 10100,
"e", 1, 10100,
"f", 2, 10100,
"g", 0, 10100
)
comp <- create_comparison(gold, pred)
label_weights <- compute_propensity_scores(label_distribution)
comp_w_label_weights <- join_propensity_scores(
input_data = comp,
label_weights = label_weights
)