
# Load data ---------------------------------------------------------------

# Orbis financial data
financials <- open_dataset("Data/financials_cleaned.parquet") |> 
  collect()

setnames(financials, old = "number_of_employees", new = "num_emp")

# Survey data
survey <- open_dataset("Data/survey_data_prepped.parquet") |>
  # Can only use observations with agreement to data link
  filter(cdat3 == 1L) |>
  collect()

setDT(survey, key = "BvDID")

setnames(survey, old = "num_emp", "num_emp_survey")

dt_merged <- financials[survey, on = c("bvd_id_number"="BvDID")] |> 
  _[!is.na(bvd_id_number)] 


# Proxy Test: Do firms actually decrease their employees ------------------


# Independent variable: Dummy for negative employee change (short-term)
survey[ , table(ccm2)]

dt_merged[, d_dates := RecordedDate %--% closing_date]

dt_merged[, d_months := time_length(d_dates, unit = "month")]

# Outcome: employment change next year
dt_merged[, d_emp := (shift(num_emp, type = "lead") - num_emp) / num_emp,
          by = "bvd_id_number"]
dt_merged[, d_emp2 := (shift(num_emp,2, type = "lead") - num_emp) / num_emp,
          by = "bvd_id_number"]
dt_merged[, d_emp3 := d_emp + d_emp2]

dt_merged[, ':='(
  pos_change = d_emp > 0,
  neg_change = d_emp < 0,
  pos_change2 = frollsum(d_emp, align = "left", n=2, na.rm = T) > 0,
  neg_change2 = frollsum(d_emp, align = "left", n=2, na.rm = T) < 0,
  l_emp_l = log(shift(num_emp, type = "lead")),
  l_emp = log(num_emp),
  l_change = log(shift(num_emp,type = "lead")) - log(num_emp)
)]


# Now: Focus on relevant period

dt_sub <- subset(dt_merged, fyear %in% c(2019, 2020))


model_list = list()

dt_reg = dt_sub[]

dt_reg[, table(size_rev)]

model_list[['Perc. Change']] <- lm_robust(d_emp ~ ccm2 + chi1 + log(total_assets) + sector, data = dt_reg)
model_list[['Pos. Change']] <- lm_robust(pos_change ~ ccm2 + chi1 + log(total_assets) + sector, data =  dt_reg)
model_list[['Neg. Change']] <- lm_robust(neg_change ~ ccm2 + chi1 + log(total_assets) + sector, data =  dt_reg)


sample_means <- dt_sub[, .(mean_perc_change = mean(d_emp, na.rm=T),
                           mean_pos_change = mean(pos_change, na.rm=T),
                           mean_neg_change = mean(neg_change, na.rm=T))] |>
  _[, lapply(.SD, round, 2)]

sample_means[,desc := "Sample Means"] 
setcolorder(sample_means, neworder = c("desc"))

options("modelsummary_stars_note"=F)

msummary(model_list, 
         gof_map = c("nobs", "adj.r.squared"), 
         coef_map = c("ccm2"='Reduce Employment', "chi1"='Increase Employment'),
         add_rows = sample_means, output = "gt",
         title = html("Proxy Test COVID. \\label{tab:proxy}"),
         escape = F) |> 
  gtsave(filename = "Tables/proxy_test_covid.tex")

rm(financials, survey, dt_merged, dt_sub, model_list, dt_reg, sample_means)
gc()