********************************************************************************
************* Comparison: Survey Sample vs. German Population/Orbis ************
********************************************************************************

clear all
set more off
********************************************************************************

********************************************************************************
/*
Read me:

In this script, we compare our sample of survey firms against two different 
populations:

(1) German firm population
(2) Universe of Orbis firms not particpating in the survey
*/
********************************************************************************


*****************
*** Paths
*****************
global path_data_in ".\Data\"
global tables ".\Tables\"



********************************************************************************
************* Comparison (1): Survey Saegen count_weight = ple vs. German Population (2019) *******
********************************************************************************


*** Read in data
use "${path_data_in}survey_data_prepped_with_weights", clear 


*** Specifying sample weight
svyset [pweight=weight_web]

*** Test: Revenues -> Numerical values
sum weight_web
display r(sum)

mean rev
svy: mean rev

display "Weighted revenues (continous variable):"
display 3559197 * r(table)[1,1]



********************************************************************************
************************
*** Comparison employees
************************
gen empl_comp = .
replace empl_comp = 1 if num_emp_cat <= 3 // 0-9 employees
replace empl_comp = 2 if num_emp_cat == 4 |  num_emp_cat == 5 // 10-49 employees
replace empl_comp = 3 if num_emp_cat == 6 // 50-249 employees
replace empl_comp = 4 if num_emp_cat == 7 | num_emp_cat == 8 | num_emp_cat == 9 // 250 and more employees

label define l_employees 1 "0-9 employees" 2 "10-49 employees" 3 "50-249 employees" 4 "250 and more employees" 
label values empl_comp l_employees


*** Compare weighted and unweighted results

proportion empl_comp
local empl_1_nw = r(table)[1,1]
local empl_2_nw = r(table)[1,2]
local empl_3_nw = r(table)[1,3]
local empl_4_nw = r(table)[1,4]

local N = e(N)


svy: proportion empl_comp
local empl_1_w = r(table)[1,1]
local empl_2_w = r(table)[1,2]
local empl_3_w = r(table)[1,3]
local empl_4_w = r(table)[1,4]



matrix T = [ ///
`empl_1_nw', `empl_1_w' ,  0.874 \ ///
`empl_2_nw', `empl_2_w', 0.101 \ ///
`empl_3_nw', `empl_3_w', 0.021  \ ///
`empl_4_nw', `empl_4_w', 0.005 ///
]
matrix TN=[`N' , ., . ]

matrix list T




frmttable, statmat(T) sdec(3) ///
ctitles("", "Unweighted Sample", "Weighted Sample",  "Population (2019)") ///
rtitles("0-9 employees" \ "10-49 employees" \ "50-249 employees" \ "250 and more employees")

frmttable, statmat(TN) sdec(0) rtitle("\(N\)") append 


frmttable using "${tables}comparison_sample_pop_employees", replace tex nocenter plain
********************************************************************************



********************************************************************************
************************
*** Comparison employees
************************
gen rev_comp = .
replace rev_comp = 1 if rev_cat <= 5 // Less than 2 mn.
replace rev_comp = 2 if rev_cat == 6 |  rev_cat == 7 |  rev_cat == 8  // 2-10 mn.
replace rev_comp = 3 if rev_cat == 9 |  rev_cat == 10 |  rev_cat == 11 |  rev_cat == 12 // 10-50 mn.
replace rev_comp = 4 if rev_cat == 13 | rev_cat == 14 

label define l_revenues 1 "Less than 2 mn." 2 "2-10 mn." 3 "10-50 mn." 4 "50 mn. or more" 
label values rev_comp l_revenues


*** Compare weighted and unweighted results

proportion rev_comp
local rev_1_nw = r(table)[1,1]
local rev_2_nw = r(table)[1,2]
local rev_3_nw = r(table)[1,3]
local rev_4_nw = r(table)[1,4]

local N = e(N)


svy: proportion rev_comp
local rev_1_w = r(table)[1,1]
local rev_2_w = r(table)[1,2]
local rev_3_w = r(table)[1,3]
local rev_4_w = r(table)[1,4]



matrix T = [ ///
`rev_1_nw', `rev_1_w' , 0.932 \ ///
`rev_2_nw', `rev_2_w', 0.051 \ ///
`rev_3_nw', `rev_3_w', 0.013  \ ///
`rev_4_nw', `rev_4_w', 0.004 ///
]
matrix TN=[`N' , ., . ]

matrix list T



frmttable, statmat(T) sdec(3) ///
ctitles("", "Unweighted Sample", "Weighted Sample",  "Population (2019)") ///
rtitles("Less than 2 mn." \ "2-10 mn." \ "10-50 mn." \ "50 mn. or more") 

frmttable, statmat(TN) sdec(0) rtitle("\(N\)") append 


frmttable using "${tables}comparison_sample_pop_revenues", replace tex nocenter plain
********************************************************************************




********************************************************************************
************************
*** Comparison employees
************************

gen sector_comp = industry_WZ08_1_letter
label values sector_comp industry_WZ08_1_letter



*** Compare weighted and unweighted results

proportion sector_comp
local sec_1_nw = r(table)[1,1]
local sec_2_nw = r(table)[1,2]
local sec_3_nw = r(table)[1,3]
local sec_4_nw = r(table)[1,4]
local sec_5_nw = r(table)[1,5]
local sec_6_nw = r(table)[1,6]
local sec_7_nw = r(table)[1,7]
local sec_8_nw = r(table)[1,8]
local sec_9_nw = r(table)[1,9]
local sec_10_nw = r(table)[1,10]
local sec_11_nw = r(table)[1,11]
local sec_12_nw = r(table)[1,12]
local sec_13_nw = r(table)[1,13]
local sec_14_nw = r(table)[1,14]
local sec_15_nw = r(table)[1,15]
local sec_16_nw = r(table)[1,16]
local sec_17_nw = r(table)[1,17]

local N = e(N)

svy: proportion sector_comp
local sec_1_w = r(table)[1,1]
local sec_2_w = r(table)[1,2]
local sec_3_w = r(table)[1,3]
local sec_4_w = r(table)[1,4]
local sec_5_w = r(table)[1,5]
local sec_6_w = r(table)[1,6]
local sec_7_w = r(table)[1,7]
local sec_8_w = r(table)[1,8]
local sec_9_w = r(table)[1,9]
local sec_10_w = r(table)[1,10]
local sec_11_w = r(table)[1,11]
local sec_12_w = r(table)[1,12]
local sec_13_w = r(table)[1,13]
local sec_14_w = r(table)[1,14]
local sec_15_w = r(table)[1,15]
local sec_16_w = r(table)[1,16]
local sec_17_w = r(table)[1,17]



matrix T = [ ///
`sec_1_nw', `sec_1_w', 0.000568668 \ /// /*B - Mining and quarrying*/
`sec_2_nw', `sec_2_w', 0.064262529 \ ///  /*C - Manufacturing*/
`sec_3_nw', `sec_3_w', 0.022440736  \ /// /*D - Energy supply*/
`sec_4_nw', `sec_4_w', 0.003252981  \ /// /*E - Water supply*/
`sec_5_nw', `sec_5_w', 0.109952891 \ /// /*F - Construction*/
`sec_6_nw', `sec_6_w', 0.171213057 \ /// /*G - Trade*/
`sec_7_nw', `sec_7_w',  0.032282001 \ /// /*H - Transport and storage*/
`sec_8_nw', `sec_8_w', 0.071435214 \ /// /*I - Hospitality*/
`sec_9_nw', `sec_9_w', 0.038633433 \ /// /*J - Information and communication*/
`sec_10_nw', `sec_10_w', 0.021298343 \ /// /*K - Provision of financial and insurance services*/
`sec_11_nw', `sec_11_w',  0.053147381 \ /// /*L - Real estate and housing*/
`sec_12_nw', `sec_12_w', 0.149879313 \ /// /*M - Provision of freelance, scientific and technical services*/
`sec_13_nw', `sec_13_w', 0.064458921  \ /// /*N - Provision of other commercial services*/
`sec_14_nw', `sec_14_w', 0.02263741 \ /// /*P - Education and teaching*/
`sec_15_nw', `sec_15_w', 0.071339125  \ /// /*Q - Health and social services*/
`sec_16_nw', `sec_16_w', 0.033955412  \ /// /*R - Art, entertainment and recreation*/
`sec_17_nw', `sec_17_w', 0.069242585 /// /*S - Provision of other services*/
]

matrix TN=[`N' , ., . ]
matrix list T



frmttable, statmat(T) sdec(3) ///
ctitles("", "Unweighted Sample", "Weighted Sample",  "Population (2019)") ///
rtitles("B - Mining and quarrying" \ "C - Manufacturing" \ "D - Energy supply" \ ///
"E - Water supply" \"F - Construction" \ "G - Trade" \  "H - Transport and storage" \ ///
"I - Hospitality" \ "J - Information and communication" \  "K - Provision of financial and insurance services" \ ///
"L - Real estate and housing" \  "M - Provision of freelance, scientific and technical services" \ ///
"N - Provision of other commercial services" \  "P - Education and teaching" \ ///
"Q - Health and social services" \  "R - Art, entertainment and recreation" \ ///
"S - Provision of other services" ) 


frmttable, statmat(TN) sdec(0) rtitle("\(N\)") append 

frmttable using "${tables}comparison_sample_pop_sector", replace tex nocenter plain
********************************************************************************


