
#delimit ;
pause on;

cd "$global_outputs";

/********************************************************************************************
3.1. Final cleaning of New comprehensive data variables
********************************************************************************************/

use "$input_new_comprehensive", clear;

/***************** Add/drop variables; *****************/ 
drop country;
merge 1:1 idstd using "$global_inputs_replication\\add_vars.dta";
keep if _merge==3;
drop _merge;

replace country = "Slovak Republic2013" if country == "Slovakia2013";
replace country = "Slovak Republic2019" if country == "Slovakia2019";

gen year=substr(country,-4,.);
destring year, replace;

/***************** Indicator run only surveys after 2009; *****************/
keep if year >= 2009;

merge m:1 country using "$global_outputs\\fieldwork.dta", update replace;
	drop if _m == 2;
	drop _merge;
	
egen country2= ends(country) , punct(2) trim head ; 
replace country_abr="NER" if country2=="Niger" ;
gen wbcode=country_abr;

replace country = "Somalia (Bosaso and Mogadishu)2019" if country=="Somalia2019"; //added by Andrea on 4/22/2021, from new3

cap drop isic*;

gen ownership=1 if b2b>=10&b2b!=.;
replace ownership=2 if b2b<10 & b2b>=0;
replace ownership = 2 if country == "Pakistan2013" & b2b < 0;

gen exporter=1 if d3c>=10& d3c<=100;
replace exporter=2 if d3c<10 & d3c>=0;
replace exporter=2 if country == "Pakistan2013" & d3c < 0;

** some cleaning- 18 Jan 2011 - Minh;
replace e11=. if e11==9; //Indonesia2009
replace e30=. if e30==9; //Indonesia2009
replace g4=. if g4==8; //Indonesia2009
replace k4=2 if k4==20; //Indonesia2009

replace c9a=(c9b/d2)*100 if d2>=0 & c9a==. & c9b>=0 & c9b/d2 <= 1; /* moved here by Nona on May 10, 2024, after consultation with Jorge to put combination and not components through outliers, also added hard max of 100 for ratio of c9b and d2 */;
replace c8=. if c8b==-9; /*added by Nona on May 13, 2024, after consultations with Jorge and Josh */;

capture drop s;
gen s=l1+l6*(l8/12) if l1>=0 & l6>=0 & l8>=0 & l1!=. & l6!=. & l8!=.;  /*** These formulas were updated by Nona on August 4, 2017 ***/;
replace s=l1+l6*(1/3) if l1>=0 & l6>=0 & (l8<0 | l8 ==.) & l1!=. & l6!=. ;
replace s=l1 if l1>=0 & l1!=. & s==.;
replace s=s+0.5*l1a if l1a>=0 & l1a!=.; // added on April 27, 2023 by Nona with clearance from Josh and Jorge;
*** NOTE THE ABOVE FORMULA HAS TO BE REPEATED FOR WK14 BELOW, AFTER OUTLIERS ARE REMOVED *** ; 


/******************* ELIMINATE NEGATIVES ************************/
replace r10 = .c if r10 == -7; // added by Nona on May 23, 2023;

replace e2b = .c if e2b == -4; // added by Nona on June 27, 2024;
replace e2b_BR = .c if e2b_BR == -4; // added by Nona on June 27, 2024;
replace e2b_ESBR = .c if e2b_ESBR == -4; // added by Nona on June 27, 2024;

replace e33 = .c if e33 == -7; // added by Nona on June 27, 2024;
replace e33_BR = .c if e33_BR == -7; // added by Nona on June 27, 2024;
replace e33 = .d if e33 == -6; // added by Nona on June 27, 2024;
replace e33_BR = .d if e33_BR == -6; // added by Nona on June 27, 2024;

replace d4=.c if d4 == -7; // added by Nona on May 22, 2025;
replace d4a=.c if d4a == -7; // added by Nona on May 22, 2025;
replace d14=.c if d14 == -7; // added by Nona on May 22, 2025;
replace d14a=.c if d14a == -7; // added by Nona on May 22, 2025;

replace d4b=.c if d4b == -9; // added by Nona on Sept 18, 2025; 
replace d14b=.c if d14b == -9; // added by Nona on Sept 18, 2025; 
replace l38b=.c if l38b == -9; // added by Nona on Sept 18, 2025; 

replace k36=.d if k36 == -6; // added by Nona on Sept 30, 2025; 
replace k36=.e if k36 == -5; // added by Nona on Sept 30, 2025; 
replace k40=.d if k40 == -6; // added by Nona on Sept 30, 2025; 
replace k40=.e if k40 == -5; // added by Nona on Sept 30, 2025; 

foreach var in k3a k5a k3bc k3e k3f k3hd k5bc k5e k5f k5hdj k5i k16 k17 k20a1 k4 d40b d40b_BR e31a j35b d33b d33b_BR {; // added by Nona on June 15, 2023; // j35b d33b d40b_BR d33b_BR added on May 22, 2025 by Nona ; 
	replace `var' = .c if `var' < 0; 
};
# delimit;
ds3, any (X<0);
for var `r(varlist)': replace X=. if X<0&X!=-8;
/*added by Minh 7 Dec 2010 */
for var j5 j7a j7b j6 j6a j12 j15 c21 c14 c5 g4: replace X=.a if X==-8;
for var j6a: replace X=.b if X==-9;
ds3, any (X<0);
for var `r(varlist)': replace X=. if X<0;
replace r10 = -7 if r10 == .c; // added by Nona on May 23, 2023;

replace d40b = -9 if d40b == .c; // added by Nona on June 27, 2024;
replace d40b_BR = -9 if d40b_BR == .c; // added by Nona on May 22, 2025;
replace e31a = -9 if e31a == .c; // added by Nona on June 27, 2024;
replace j35b = -9 if j35b == .c; // added by Nona on May 22, 2025;
replace d33b = -9 if d33b == .c; // added by Nona on May 22, 2025;
replace d33b_BR = -9 if d33b_BR == .c; // added by Nona on May 22, 2025;

replace d4b = -9 if d4b == .c; // added by Nona on Sept 18, 2025; 
replace d14b = -9 if d14b == .c; // added by Nona on Sept 18, 2025; 
replace l38b = -9 if l38b == .c; // added by Nona on Sept 18, 2025;

replace e2b = -4 if e2b == .c; // added by Nona on June 27, 2024;
replace e2b_BR = -4 if e2b_BR == .c; // added by Nona on June 27, 2024;
replace e2b_ESBR = -4 if e2b_ESBR == .c; // added by Nona on June 27, 2024;

replace e33 = -7 if e33 == .c; // added by Nona on June 27, 2024;
replace e33_BR = -7 if e33_BR == .c; // added by Nona on June 27, 2024;
replace e33 = -6 if e33 == .d; // added by Nona on June 27, 2024;
replace e33_BR = -6 if e33_BR == .d; // added by Nona on June 27, 2024;

replace d4=-7 if d4 == .c; // added by Nona on May 22, 2025;
replace d4a=-7 if d4a == .c; // added by Nona on May 22, 2025;
replace d14=-7 if d14 == .c; // added by Nona on May 22, 2025;
replace d14a=-7 if d14a == .c; // added by Nona on May 22, 2025;

replace k36=-6 if k36 == .d; // added by Nona on Sept 30, 2025; 
replace k36=-5 if k36 == .e; // added by Nona on Sept 30, 2025; 
replace k40=-6 if k40 == .d; // added by Nona on Sept 30, 2025; 
replace k40=-5 if k40 == .e; // added by Nona on Sept 30, 2025; 

/******************** Industry and sector ***********************/
encode sector_MS, gen(sector2);

recode industry (6 13 14 27=16) (10 25 4 99 28=30) (15 17 18 19 20 23 24=23) (26=15);
rename industry sector;
label define industlabel2  1 "Textiles" 2 "Leather" 3 "Garments" 5 "Food" 7 "Metals and machinery" 8 "Electronics" 9 "Chemicals and pharmaceuticals" 11 "Wood and furniture" 12 "Non-metallic and plastic materials" 15 "Auto and auto components" 16 "Other manufacturing" 23 "Other services" 21 "Retail and wholesale trade" 22 "Hotels and restaurants" 30 "Other: Construction, Transportation, etc";
label value sector industlabel2;