# Map from 5L to 3L. Need to have "Table5.csv" and "Table3.csv" in working directory
library("here") #sets working directory - make sure you load R by double-clicking this file
library("tidyverse")

df_3 = read.csv(here("Table3v5.csv")) %>% mutate(Domain = str_c(X_Y3_1, X_Y3_2, X_Y3_3, X_Y3_4, X_Y3_5),
                                                 X_age = as.numeric(as.factor(X_age5grp)), X_U5 = round(X_U3UK,10)) %>%
  rename(Output = X_EUUKcopula)   # For mapping from 3L to 5L
df_5 = read.csv(here("Table5v5.csv")) %>% mutate(Domain = str_c(X_Y5_1, X_Y5_2, X_Y5_3, X_Y5_4, X_Y5_5),
                                                 X_age = as.numeric(as.factor(X_age5grp)), X_U5 = round(X_U5UK,10)) %>%
  rename(Output = X_EUUKcopula) # For mapping from 5L to 3L

fun_Epan = function(target, values, bwidth){ # Generate Epan weight
  distance = values - target # Calculate distance from input
  Epan = case_when(abs(distance) >= bwidth ~ 0,     # If outside bandwidth weight = 0
                   TRUE ~ 1 - (distance/bwidth)^2)
  return(Epan)
}

fun_map = function(var_age, var_male, input, df, summary = 1, bwidth = 0, output = "3L"){
  # var_age = Age, var_male = Male, summary = 0/1 for if summary scores are provided
  # Input: either individual domains, or summary score. E.g. 'input = c(1, 3, 5, 1, 5)' or 'input = 0.75'
  # bwidth = bandwidth (caliper) - either zero for exact matches, or > 0.
  
  if(output == "3L") my_df = df_5 else my_df = df_3   # Check which output df to use
  
  # Ensure character input and change age and male to match column names of my_df
  var_age = as.character(var_age)
  var_male = as.character(var_male)
  input = as.character(input)
  df = df %>% rename("X_male" = all_of(var_male), "tmp_age" = all_of(var_age)) %>%
    # New code - map age to age-band. Input will either be individual age [16-100] or age-band integer [1-5]
    mutate(X_age = case_when(tmp_age >= 16 & tmp_age < 35 ~ 1,
                             tmp_age >= 35 & tmp_age < 45 ~ 2,
                             tmp_age >= 45 & tmp_age < 55 ~ 3,
                             tmp_age >= 55 & tmp_age < 65 ~ 4,
                             tmp_age >= 65 & tmp_age <= 100 ~ 5,
                             tmp_age >= 1 & tmp_age <= 5 ~ as.double(tmp_age),
                             TRUE ~ 9999))
  
  if(max(df$X_age) == 9999) stop('Supplied age must be either between 16 and 100 or an age band (1 to 5)')
  
  if (summary == 0){ # Individual domains provided
    df[input] = sapply(df[input], as.character)   # Convert domains to character
    df = df %>% mutate(Domain = pmap_chr(df[input], str_c, collapse = "")) %>% # Collapse domain to single var & join 
      left_join(select(my_df, c(Domain, Output, X_age, X_male)), by = c("Domain", "X_age", "X_male"))

  } else if (summary == 1){ # Summary score provided
    df = df %>% rename("X_U5" = all_of(input)) %>% mutate(ID = 1:n()) # Change outcomes to match, add unique ID
    if (bwidth == 0){ # Exact score provided, lookup on this
      df = left_join(df, select(my_df, c(X_U5, Output, X_age, X_male)), by = c("X_U5", "X_age", "X_male")) %>%
        group_by(ID) %>% summarise(X_age = mean(X_age), X_male = mean(X_male), X_U5 = mean(X_U5), Output = mean(Output))
      #if (is.na(df$Output)) df$Output = "No exact matches found, try setting bandwidth > 0"
      
    } else { # Approximate score provided, weighted average over all observations (matched by age and sex).
      df = df %>% mutate(tmp = list(my_df),
                    data = pmap(list(X_age, X_male, tmp), function(x, y, z) filter(z, X_age == x & X_male == y)),
                    Epan = map2(X_U5, data, function(x, y) fun_Epan(target = x, values = y$X_U5, bwidth = bwidth)),
                    Output = map2_dbl(data, Epan, function(x, y) weighted.mean(x = x$Output, w = y))) %>%
        select(-c(tmp, data, Epan)) %>% unnest_legacy()
    }
  } else my_out = "Incorrect value for summary, must be 0 or 1"
  return(df) 
}

# Examples of how to use - need the files "eq5dmapUK - data1.csv" and "eq5dmapUK - data2.csv" in the same folder)
EQdata1 <- read.csv("eq5dmapUK - data1.csv", fileEncoding="UTF-8-BOM")

#run example on  5L descriptive 
fun_map("age", "male", input = c("Y5_1", "Y5_2", "Y5_3", "Y5_4", "Y5_5"), EQdata1, summary = 0)  

#run example on 5L exact utility score
EQdata2 = read.csv("eq5dmapUK - data2.csv", fileEncoding="UTF-8-BOM")

fun_map("age", "male", input = "util5L", EQdata2, summary = 1, bwidth =0.0001)  # From a 5L utility score

# run example on line 3 5L approx utility score
fun_map("age", "male", input = "util5L", EQdata2, summary = 1, bwidth =0.1)  # From a summary 5L utility score
