rm(list = ls())

#install.packages("readxl")
library("readxl")
library("openxlsx")
library(dplyr)
library(raster)
library(raster)
library(ggplot2)


######### INFORMATION TO MODIFY ########

folder_to_upload <-""                                        # Name of the folder to find the files
folder_to_save <-""                                           # Name of the folder to save the files/figures

progress_threshold <- 0                                    # Define what your threshold would be to include the surveys (here, we want to select survey with a progress >= 90%)

number_column_demographics <-c()                  # Indicate where your demographics questions are
number_column_questions <-c()                    # Indicate where your questions are
number_closing_questions <-c()                    # Indicate where your final questions are

N_research_question <- 57 # Number of research questions total
N_criteria <- 4 # Number of criteria
weight_criteria <- c(0.8, 0.84, 0.88, 0.80) # (Impact, Answerability, Relevancy, Potential for translation)

demographic_criteria_to_analyze<- 'Status'  # Write 'Status' to not divide by demographics. Otherwise: e.g. 'Q5bis_5' / 'Q2' / 

########################################

### IMPORT SURVEY
results_qualtrics<-read_excel(paste0(folder_to_upload,''))                 # Upload the output of Qualtrics (xlsx format)
question_list<-read_excel(paste0(folder_to_upload,''))                         # Upload the list of the research questions (xlsx format)


### SELECT SURVEY BASED ON PROGRESS
results_qualtrics%>%filter(as.numeric(Progress)>=progress_threshold)->results_qualtrics_select


### CALCULATE SCORE

### 1.CALCULATE NUMBER OF ANSWERS PER CRITERIA

# Select data by demographic criteria
results_mod<-results_qualtrics_select%>%rename(Variable=demographic_criteria_to_analyze)
#results_demo<-results_mod%>%dplyr::select(Variable)%>%group_by(Variable)%>%summarize(n())
results_demo <- results_mod %>%
  dplyr::select(Variable) %>%
  filter(!is.na(Variable)) %>%  # Removes NA values
  group_by(Variable) %>%
  summarize(n = n())


N_values_demo<-dim(results_demo)[1]

clean_filename <- function(name) {
  name <- gsub("[/:?*\"|<>]", "_", name)  # Replace invalid characters
  name <- gsub("\\s+", "_", name)  # Replace spaces and newlines with underscores
  return(name)
}


for(k in 1:N_values_demo) {                # Select demographic 
  demo_criteria<-results_demo[k,1]
  
  Column_name <- c("N (Yes)","N (Maybe)","N (No)","N (Not my area of expertise)","N (NA)","N (Yes, No, Maybe)","N (Yes, No, Maybe, Not expertise)","N total","Unweighted RPS","Weighted RPS","AEA")
  df.answers_QR_each_question <-data.frame(matrix(ncol=N_research_question*N_criteria,nrow=length(Column_name)))
  colnames(df.answers_QR_each_question) <- colnames(results_mod %>% dplyr::select(number_column_questions[1]:number_column_questions[2]))
  N_column=1
  
  for (i in 1:N_research_question){   # Select research question
    
    for (j in 1:N_criteria){          # Select criteria within the research question
      
      data.RQ<-results_mod%>%filter(Variable==demo_criteria$Variable)%>%dplyr::select(paste0('#',i,'_',j))%>%rename(Answer=paste0('#',i,'_',j))
      count.answer <- data.RQ%>%group_by(Answer)%>%summarize(N=length(Answer))
      count.tot <- data.RQ%>%summarize(N=length(Answer))
      
      # Count Yes    
      count.yes <- data.frame(matrix(0,ncol=3,nrow=1))
      
      if (any(count.answer[1:min(4, nrow(count.answer)), 1] == "Yes", na.rm = TRUE)) {
        count.yes <- count.answer %>%
          filter(Answer == "Yes") %>%
          mutate(Score_weight = 1 * N)
      } else {
        count.yes <- data.frame(matrix(0, ncol = 3, nrow = 1))
        colnames(count.yes) <- c("Answer", "N", "Score_weight")  # Adjust column names as needed
      }
      
      # Count No  
      count.no <- data.frame(matrix(0,ncol=3,nrow=1))
      
      if (any(count.answer[1:min(4, nrow(count.answer)), 1] == "No", na.rm = TRUE)) {
        count.no <- count.answer %>%
          filter(Answer == "No") %>%
          mutate(Score_weight = 0 * N)
      } else {
        count.no <- data.frame(matrix(0, ncol = 3, nrow = 1))
        colnames(count.no) <- c("Answer", "N", "Score_weight")  # Adjust column names as needed
      }
      
      # Count Maybe (first position)
      count.maybe <- data.frame(matrix(0,ncol=3,nrow=1))
      
      if (any(count.answer[1:min(4, nrow(count.answer)), 1] == "Maybe", na.rm = TRUE)) {
        count.maybe <- count.answer %>%
          filter(Answer == "Maybe") %>%
          mutate(Score_weight = 0.5 * N)
      } else {
        count.maybe <- data.frame(matrix(0, ncol = 3, nrow = 1))
        colnames(count.maybe) <- c("Answer", "N", "Score_weight")  # Adjust column names as needed
      }
      
      # Count Not my area of expertise 
      count.no.exp <- data.frame(matrix(0,ncol=3,nrow=1))
      
      if (any(count.answer[1:min(4, nrow(count.answer)), 1] == "Not my area of expertise", na.rm = TRUE)) {
        count.no.exp <- count.answer %>%
          filter(Answer == "Not my area of expertise") %>%
          mutate(Score_weight = 0 * N)
      } else {
        count.no.exp <- data.frame(matrix(0, ncol = 3, nrow = 1))
        colnames(count.no.exp) <- c("Answer", "N", "Score_weight")  # Adjust column names as needed
      }
      
      # Count NA      
      count.na <- count.tot-count.no[2]-count.maybe[2]-count.yes[2]-count.no.exp[2]
      
      count.yes.no.maybe <- (count.no[2]+count.maybe[2]+count.yes[2])
      count.yes.no.maybe.exp <- (count.no[2]+count.maybe[2]+count.yes[2]+count.no.exp[2])
      
      
      # Calculate MODE
      mode<-modal(data.RQ$Answer,na.rm = TRUE)
      num <- count.answer %>% filter(count.answer$Answer==mode) %>% pull(N)
      
      
      # Compute weighted and unweighted ratios
      ratio_per_criteria_for_RPS_unweight <- (count.maybe[3]+count.yes[3])/(count.yes.no.maybe)
      ratio_per_criteria_for_RPS_weight <- (count.maybe[3]+count.yes[3])/(count.yes.no.maybe)*weight_criteria[j]
      
      # Compute AEA
      if(any(is.nan(pull(ratio_per_criteria_for_RPS_weight)))){ratio_per_criteria_for_AEA<-NA
      } else {
        ratio_per_criteria_for_AEA <-num/count.yes.no.maybe.exp
      }
      
      # Combine all values 
      all.counts <- c(
        as.numeric(count.yes[2]), 
        as.numeric(count.maybe[2]), 
        as.numeric(count.no[2]), 
        as.numeric(count.no.exp[2]), 
        as.numeric(count.na), 
        as.numeric(count.yes.no.maybe), 
        as.numeric(count.yes.no.maybe.exp), 
        as.numeric(count.tot), 
        as.numeric(ratio_per_criteria_for_RPS_unweight), 
        as.numeric(ratio_per_criteria_for_RPS_weight), 
        as.numeric(ratio_per_criteria_for_AEA)
      )
      
      df.answers_QR_each_question[,N_column]<-all.counts
      N_column<-N_column+1
    }
  }
  
  all.count.criteria<-cbind(Column_name,df.answers_QR_each_question)
  
  safe_filename <- clean_filename(paste0('count_criteria_RQdate_', demographic_criteria_to_analyze, '_', demo_criteria, '.xlsx'))
  write.xlsx(all.count.criteria, paste0(folder_to_save, safe_filename))
  
}