# HWF Randomised Pricing Experiment -------- # README --------------------------------------------------------------------------------------------------------------- # This R script contains the code used to run the following: # Unadjusted & adjusted Poisson regression models for Phase 2 of the analysis (Table D of the supporting information). # Estimation of 50% purchase probability using the regression model (Fig B of the supporting information). # 1. System requirements -- # Operating system: Windows 10 or later, macOS Monterey or later, Linux # Software: R version 4.0 or later (tested on R 4.5.0) # Required R packages: # readr (load csv file) # fixest (regression model) # dplyr (data manipulation) # sandwich (robust SEs) # broom (tidy regression output) # scales (p-values) # ggplot2 (making graphs) # 2. Installation guide # Install R from https://cran.r-project.org/ # Install RStudio (recommended) from https://posit.co/download/rstudio-desktop/ # Install required packages by running: # install.packages(c("readr", "fixest", "dplyr", "sandwich", "broom", "scales", "ggplot2")) # Typical install time is less than 10 minutes # 3. Instructions for use # Request the "Phase1-2_data.zip" file. # Place "Phase2_data.csv" with this R script in the same folder. # Set working directory (in R: setwd("path/to/your/folder") or via Session > Set Working Directory in RStudio) # Run the analysis script # LOAD PACKAGES + DATA --------------------------------------------------------------------------------------------------------------- install.packages(c("readr", "fixest", "dplyr", "sandwich", "broom", "scales", "ggplot2")) library(readr) library(fixest) library(dplyr) library(sandwich) library(broom) library(scales) library(ggplot2) Phase2_data <- read_csv("Phase2_data.csv") # POISSON REGRESSION - PURCHASING + EFFECTIVE PRICE--------------------------------------------------------------------------------------------------------------- # Data Cleaning -------------------------------------------------------------- Phase2_data <- Phase2_data |> mutate(outcome_bin = case_when( outcome == "Yes" ~ 1, outcome == "No" ~ 0), gender = factor(gender, levels = c("Male", "Female")), community = factor(community, levels = c("George", "Matero"))) # Unadjusted -------------------------------------------------------------- unadjusted_model <- feglm(outcome_bin ~ as.numeric(effective_price), family = poisson(link = "log"), data = Phase2_data, vcov = sandwich) unadjusted_model_formatted <- tidy(unadjusted_model, conf.int = T, conf.level = 0.95, vcov = vcovHC(unadjusted_model, type = "HC0")) |> mutate( RR = exp(estimate), lower_CI = exp(conf.low), upper_CI = exp(conf.high) ) %>% select(term, RR, lower_CI, upper_CI, p.value) |> mutate( RR_CI = sprintf("%.2f (%.2f - %.2f)", RR, lower_CI, upper_CI), p.value = pvalue(p.value) ) %>% select(term, RR_CI, p.value) unadjusted_model_formatted # Adjusted -------------------------------------------------------------- adjusted_model <- feglm(outcome_bin ~ effective_price + age + gender + wealth + community, family = poisson(link = "log"), data = Phase2_data, vcov = sandwich) adjusted_model_formatted <- tidy(adjusted_model, conf.int = T, conf.level = 0.95, vcov = vcovHC(adjusted_model, type = "HC0")) |> mutate( RR = exp(estimate), lower_CI = exp(conf.low), upper_CI = exp(conf.high) ) %>% select(term, RR, lower_CI, upper_CI, p.value) |> mutate( RR_CI = sprintf("%.4f (%.4f - %.4f)", RR, lower_CI, upper_CI), p.value = pvalue(p.value) ) %>% select(term, RR_CI, p.value) adjusted_model_formatted # FIG B - SUPPORTING INFORMATION ------------------------------------------ # Step 1: Generate effective price sequence price_range <- data.frame( effective_price = seq(min(Phase2_data$effective_price, na.rm = TRUE), max(Phase2_data$effective_price, na.rm = TRUE), length.out = 100)) # Step 2: Add other predictors with median/reference values price_range$age <- median(Phase2_data$age, na.rm = TRUE) price_range$wealth <- median(Phase2_data$wealth, na.rm = TRUE) price_range$gender <- factor("Male", levels = c("Male", "Female")) price_range$community <- factor("George", levels = c("George", "Matero")) # Step 3: Predict using the model price_range$predicted_prob <- predict( adjusted_model, newdata = price_range, type = "response" ) # Step 4: Extract coefficients coef_list <- coef(adjusted_model) intercept <- coef_list["(Intercept)"] slope <- coef_list["effective_price"] beta_age <- coef_list["age"] beta_wealth <- coef_list["wealth"] median_wealth <- median(Phase2_data$wealth, na.rm = TRUE) median_age <- median(Phase2_data$age, na.rm = TRUE) beta_gender <- coef_list["genderFemale"] if (is.na(beta_gender)) beta_gender <- 0 beta_location <- coef_list["communityMatero"] if (is.na(beta_location)) beta_location <- 0 # Step 5: Find effective price where predicted probability is closest to 0.5 closest_index <- which.min(abs(price_range$predicted_prob - 0.5)) target_price <- price_range$effective_price[closest_index] # Step 6: Plot regression_plot <- ggplot(price_range, aes(x = effective_price, y = predicted_prob)) + geom_line(color = "blue", linewidth = 1.2) + geom_hline(yintercept = 0.5, linetype = "dashed", color = "grey40") + geom_vline(xintercept = target_price, linetype = "dashed", color = "red") + annotate("text", x = target_price, y = 0.57, label = paste0("Effective price at 50% \npurchase probability: ", round(target_price, 0), " ZMW"), hjust = -0.03, color = "red", lineheight = 1, size = 5) + scale_y_continuous(limits = c(0,1.1), breaks = seq(0,1.1,0.2), labels = scales::number_format(accuracy = 0.1)) + labs( x = "Effective Price of HWF (ZMW)", y = "Predicted Probability of Purchase (Poisson)" ) + theme_minimal(base_size = 16)+ theme( axis.line = element_line(color = "black", linewidth = 0.8), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"), axis.title = element_text(color = "black"), panel.background = element_rect(fill = "white", color = NA), plot.background = element_rect(fill = "white", color = NA) ) regression_plot