﻿* Encoding: UTF-8.
* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Ethiopia (Addis Ababa + Somali) PPS sampling.
* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

file handle SampPath /name "D:\Fafo\Data\Ethiopia_Uganda_2021\ETH\Sample".
file handle DataPath /name "D:\Fafo\Data\Ethiopia_Uganda_2021\ETH\Survey1\Raw\AddisAbaba+Somali_PPS".

* 4.1 Load data.

* 4.1.1 Sample data.

GET DATA
  /TYPE=XLSX
  /FILE='SampPath\Sample information for weigh calculations-Addis Ababa+Somila_PPS.xlsx'
  /SHEET=name 'Initial clusters'
  /CELLRANGE=FULL
  /READNAMES=ON
  /LEADINGSPACES IGNORE=YES
  /TRAILINGSPACES IGNORE=YES
  /DATATYPEMIN PERCENTAGE=95.0
  /HIDDEN IGNORE=YES.
dataset name Samp.

delete variables GEOCode1 GEOName1 GEOCode2 GEOName2 GEOCode3 GEOName3 GEOCode4 GEOName4 GEOCode5 GEOName5 GEOCode6 GEOName6 GEOName6 Otherinformationofcluster Urbanorrural1urban2rural 
    Campornoncamp1camp2noncamp Country1Ethiopia2Uganda NumberofEAsinStratum.

rename variables ( ClusterID = QHCLUSTER  ) ( ActualClusterID = InitCluster ) ( NumberofselectedEAs = ClusSelStra ) ( Numberofhhsinstratum = HHStra ) ( NumberofhhsBEFOREListing = HHClusBe ) 
    ( NumberofnationalhhsAFTERlisting = HHClusList_Nat )  ( Samplesizeofselectednationalhhs = HHSelClus_Nat ) ( Numberofrefugeesfromlisting = HHClusList_Ref ) ( Samplesizeofselectedrefugeehhs = HHSelClus_Ref ).

* remove duplicate cluster information.
dataset declare Samp2.
dataset activate Samp.
aggregate outfile = Samp2
    /break = InitCluster
    /Stratum = first( Stratum )
    /ClusSelStra = first( ClusSelStra )
    /HHStra = first( HHStra )
    /HHClusBe = first( HHClusBe )
    /HHClusList_Nat = first( HHClusList_Nat )
    /HHClusList_Ref = first( HHClusList_Ref )
    /HHSelClus_Nat = first( HHSelClus_Nat )
    /HHSelClus_Ref = first( HHSelClus_Ref ).
dataset close Samp.

sort cases by InitCluster.

* 4.1.2 Survey data.

get file "DataPath\H_MAIN.sav"
    /keep QHCLUSTER QHID QHRESULT QHMEMBER QHRSIELIG QHRSI QHTYPE QHSTOP LS515 QHGEOCODE1.
dataset name Main.
sort cases by QHCLUSTER QHID.

get file "DataPath\I_MAIN.sav"
    /keep QICLUSTER QIHID QIIDV QIRESULT QITYPE QISTOP SF1111 QIRESULTO
    /rename( QICLUSTER = QHCLUSTER ) ( QIHID = QHID ).
dataset name IDV.
sort cases by QHCLUSTER QHID QIIDV.

match files file = main
    /file = IDV
    /by QHCLUSTER QHID.
exe.
dataset name Main2.
dataset close Main.
dataset close IDV.

dataset activate main2.

* Delete duplicate cases.

if QHCLUSTER = 1087 and ( range( QHID, 4067, 4070 ) or range( QHID, 4072, 4085 ) ) delcase = 1.
if QHCLUSTER = 1088 and any( QHID, 4086, 4087, 4088, 4089, 4090, 4092, 4094, 4095, 4096, 4097, 4098 ) delcase = 1.
if QHCLUSTER = 1114 and range( QHID, 4182, 4213 ) delcase = 1.
if QHCLUSTER = 1128 and range( QHID, 4266, 4306 ) delcase = 1.
if QHCLUSTER = 1142 and ( range( QHID, 4393, 4397 ) or any( QHID, 4400, 4403, 4404, 4408, 4409, 4410, 4411, 4415, 4416, 4417 ) ) delcase = 1.
if QHCLUSTER = 1144 and range( QHID, 4433, 4441 ) delcase = 1.
if QHCLUSTER = 911263 and range( QHID, 5507, 5523 ) delcase = 1.
select if missing( delcase ).

* Change cluster ID.

compute InitCluster = QHCLUSTER.
If QHCLUSTER =  	108711	  InitCluster = 	1087	 .
If QHCLUSTER =  	108811	  InitCluster = 	1088	 .
If QHCLUSTER =  	111411	  InitCluster = 	1114	 .
If QHCLUSTER =  	112811	  InitCluster = 	1128	 .
If QHCLUSTER =  	114211	  InitCluster = 	1142	 .
If QHCLUSTER =  	114411	  InitCluster = 	1144	 .
If QHCLUSTER =  	910871	  InitCluster = 	10871	 .
If QHCLUSTER =  	910872	  InitCluster = 	10872	 .
If QHCLUSTER =  	910873	  InitCluster = 	10873	 .
If QHCLUSTER =  	910874	  InitCluster = 	10874	 .
If QHCLUSTER =  	910875	  InitCluster = 	10875	 .
If QHCLUSTER =  	910881	  InitCluster = 	10881	 .
If QHCLUSTER =  	910882	  InitCluster = 	10882	 .
If QHCLUSTER =  	910883	  InitCluster = 	10883	 .
If QHCLUSTER =  	910884	  InitCluster = 	10884	 .
If QHCLUSTER =  	910885	  InitCluster = 	10885	 .
If QHCLUSTER =  	910886	  InitCluster = 	10886	 .
If QHCLUSTER =  	910887	  InitCluster = 	10887	 .
If QHCLUSTER =  	910991	  InitCluster = 	10991	 .
If QHCLUSTER =  	910992	  InitCluster = 	10992	 .
If QHCLUSTER =  	910993	  InitCluster = 	10993	 .
If QHCLUSTER =  	910994	  InitCluster = 	10994	 .
If QHCLUSTER =  	910995	  InitCluster = 	10995	 .
If QHCLUSTER =  	910996	  InitCluster = 	10996	 .
If QHCLUSTER =  	910997	  InitCluster = 	10997	 .
If QHCLUSTER =  	911101	  InitCluster = 	11101	 .
If QHCLUSTER =  	911102	  InitCluster = 	11102	 .
If QHCLUSTER =  	911103	  InitCluster = 	11103	 .
If QHCLUSTER =  	911104	  InitCluster = 	11104	 .
If QHCLUSTER =  	911105	  InitCluster = 	11105	 .
If QHCLUSTER =  	911106	  InitCluster = 	11106	 .
If QHCLUSTER =  	911107	  InitCluster = 	11107	 .
If QHCLUSTER =  	911131	  InitCluster = 	11131	 .
If QHCLUSTER =  	911132	  InitCluster = 	11132	 .
If QHCLUSTER =  	911133	  InitCluster = 	11133	 .
If QHCLUSTER =  	911134	  InitCluster = 	11134	 .
If QHCLUSTER =  	911135	  InitCluster = 	11135	 .
If QHCLUSTER =  	911136	  InitCluster = 	11136	 .
If QHCLUSTER =  	911137	  InitCluster = 	11137	 .
If QHCLUSTER =  	911141	  InitCluster = 	11141	 .
If QHCLUSTER =  	911142	  InitCluster = 	11142	 .
If QHCLUSTER =  	911143	  InitCluster = 	11143	 .
If QHCLUSTER =  	911144	  InitCluster = 	11144	 .
If QHCLUSTER =  	911145	  InitCluster = 	11145	 .
If QHCLUSTER =  	911146	  InitCluster = 	11146	 .
If QHCLUSTER =  	911261	  InitCluster = 	11261	 .
If QHCLUSTER =  	911262	  InitCluster = 	11262	 .
If QHCLUSTER =  	911263	  InitCluster = 	11263	 .
If QHCLUSTER =  	911264	  InitCluster = 	11264	 .
If QHCLUSTER =  	911282	  InitCluster = 	11282	 .
If QHCLUSTER =  	911283	  InitCluster = 	11283	 .
If QHCLUSTER =  	911284	  InitCluster = 	11284	 .
If QHCLUSTER =  	911285	  InitCluster = 	11285	 .
If QHCLUSTER =  	911291	  InitCluster = 	11291	 .
If QHCLUSTER =  	911292	  InitCluster = 	11292	 .
If QHCLUSTER =  	911293	  InitCluster = 	11293	 .
If QHCLUSTER =  	911294	  InitCluster = 	11294	 .
If QHCLUSTER =  	911295	  InitCluster = 	11295	 .
If QHCLUSTER =  	911296	  InitCluster = 	11296	 .
If QHCLUSTER =  	911297	  InitCluster = 	11297	 .
If QHCLUSTER =  	911353	  InitCluster = 	11353	 .
If QHCLUSTER =  	911354	  InitCluster = 	11354	 .
If QHCLUSTER =  	911355	  InitCluster = 	11355	 .
If QHCLUSTER =  	911357	  InitCluster = 	11357	 .
If QHCLUSTER =  	911358	  InitCluster = 	11358	 .
If QHCLUSTER =  	911361	  InitCluster = 	11361	 .
If QHCLUSTER =  	911362	  InitCluster = 	11362	 .
If QHCLUSTER =  	911363	  InitCluster = 	11363	 .
If QHCLUSTER =  	911364	  InitCluster = 	11364	 .
If QHCLUSTER =  	911422	  InitCluster = 	11422	 .
If QHCLUSTER =  	911423	  InitCluster = 	11423	 .
If QHCLUSTER =  	911424	  InitCluster = 	11424	 .
If QHCLUSTER =  	911425	  InitCluster = 	11425	 .
If QHCLUSTER =  	911431	  InitCluster = 	11431	 .
If QHCLUSTER =  	911432	  InitCluster = 	11432	 .
If QHCLUSTER =  	911433	  InitCluster = 	11433	 .
If QHCLUSTER =  	911434	  InitCluster = 	11434	 .
If QHCLUSTER =  	911435	  InitCluster = 	11435	 .
If QHCLUSTER =  	9112631	  InitCluster = 	11263	 .

sort cases by InitCluster.

* 4.2 Check sample file with survey data.

dataset activate Main2.

* Change HH type due to note from Tewodros.

if QHCLUSTER = 	5005	 and QHID = 	2001	 TypeChange = 1.
if QHCLUSTER = 	5018	 and QHID = 	2002	 TypeChange = 1.
if QHCLUSTER = 	5018	 and QHID = 	2003	 TypeChange = 1.
if QHCLUSTER = 	5025	 and QHID = 	2004	 TypeChange = 1.
if QHCLUSTER = 	5025	 and QHID = 	2005	 TypeChange = 1.
if QHCLUSTER = 	5029	 and QHID = 	2006	 TypeChange = 1.
if QHCLUSTER = 	5036	 and QHID = 	2007	 TypeChange = 1.
if QHCLUSTER = 	5036	 and QHID = 	2008	 TypeChange = 1.
if QHCLUSTER = 	5043	 and QHID = 	2009	 TypeChange = 1.
if QHCLUSTER = 	5043	 and QHID = 	2010	 TypeChange = 1.
if QHCLUSTER = 	5050	 and QHID = 	4001	 TypeChange = 1.
if QHCLUSTER = 	5051	 and QHID = 	4002	 TypeChange = 1.
if QHCLUSTER = 	5051	 and QHID = 	4003	 TypeChange = 1.
if QHCLUSTER = 	5051	 and QHID = 	4004	 TypeChange = 1.
if QHCLUSTER = 	5051	 and QHID = 	4005	 TypeChange = 1.
if QHCLUSTER = 	5066	 and QHID = 	4006	 TypeChange = 1.
if QHCLUSTER = 	5066	 and QHID = 	4007	 TypeChange = 1.
if QHCLUSTER = 	5067	 and QHID = 	4008	 TypeChange = 1.
if QHCLUSTER = 	5067	 and QHID = 	4009	 TypeChange = 1.
if QHCLUSTER = 	5067	 and QHID = 	4010	 TypeChange = 1.
if QHCLUSTER = 	5067	 and QHID = 	4011	 TypeChange = 1.
if QHCLUSTER = 	5067	 and QHID = 	4012	 TypeChange = 1.
if QHCLUSTER = 	5067	 and QHID = 	4013	 TypeChange = 1.
if QHCLUSTER = 	5069	 and QHID = 	4014	 TypeChange = 1.
if QHCLUSTER = 	5069	 and QHID = 	4015	 TypeChange = 1.
if QHCLUSTER = 	5069	 and QHID = 	4016	 TypeChange = 1.
if QHCLUSTER = 	5069	 and QHID = 	4017	 TypeChange = 1.
if QHCLUSTER = 	5069	 and QHID = 	4018	 TypeChange = 1.
if QHCLUSTER = 	5073	 and QHID = 	4064	 TypeChange = 1.
if QHCLUSTER = 	5073	 and QHID = 	4065	 TypeChange = 1.
if QHCLUSTER = 	5073	 and QHID = 	4066	 TypeChange = 1.
if QHCLUSTER = 	5073	 and QHID = 	4067	 TypeChange = 1.

if TypeChange = 1 QHTYPE = 2.

compute HH_national = QHTYPE = 1.
compute HH_refugee = QHTYPE = 2.

dataset declare check.
aggregate outfile = check
    /break = InitCluster
    /Tot_N = sum( HH_national )
    /Tot_R = sum( HH_refugee ).

match files file =Samp2
    /file = check
    /by InitCluster.
exe.
dataset name Samp3.
dataset close Samp2.
dataset close check.

dataset activate Samp3.
select if not missing( Stratum ).

* 4.3 Inclusion Probability.

* 4.3.1 PSU.

dataset activate Samp3.

compute Ppsu = HHClusBe * ClusSelStra / HHStra .

* 4.3.2 Household.

if HHClusList_Nat > 0 Phc_Nat = HHSelClus_Nat / HHClusList_Nat.
if HHClusList_Ref > 0 Phc_Ref = HHSelClus_Ref / HHClusList_Ref.
if HHSelClus_Nat  = 0 and not missing( Phc_Nat ) Phc_Nat = $sysmis.
if HHSelClus_Ref  = 0 and not missing( Phc_Ref ) Phc_Ref = $sysmis.

* For clusters that households were not selected separately.

if any( InitCluster, 5076, 5077, 5078, 5079, 5080, 5081, 5082, 5083, 5084 ) Phc_Nat = ( HHSelClus_Nat + HHSelClus_Ref  ) / ( HHClusList_Nat + HHClusList_Ref ).
if any( InitCluster, 5076, 5077, 5078, 5079, 5080, 5081, 5082, 5083, 5084 ) Phc_Ref = ( HHSelClus_Nat + HHSelClus_Ref  ) / ( HHClusList_Nat + HHClusList_Ref ).

compute Ph_Nat = Ppsu * Phc_Nat.
compute Ph_Ref = Ppsu * Phc_Ref.

* 4.3.3 RSI.

dataset activate Main2.

if QHRSIELIG > 0  Prsi = 1 / QHRSIELIG.

* 4.4 Weights.

* 4.4.1 Cluster non-response corrector.

dataset activate Samp3.

compute ClusNo = 0.
if missing( HHClusList_Nat ) and missing( HHClusList_Ref ) ClusNo = 1.

dataset declare ClusNo.
aggregate outfile = ClusNo
    /break = Stratum ClusNo
    /HHClusBe = sum( HHClusBe ).

dataset activate ClusNo.
sort cases by Stratum ClusNo.
CASESTOVARS
  /ID = Stratum
  /INDEX = ClusNo
  /GROUPBY = VARIABLE.
if missing( HHClusBe.1.00 ) HHClusBe.1.00 = 0.
compute ClusNRcorrector = 1 / ( HHClusBe..00 / ( HHClusBe..00 + HHClusBe.1.00 ) ).
exe.
delete variables HHClusBe..00 HHClusBe.1.00.

dataset activate Samp3.
sort cases by Stratum InitCluster.

match files table = ClusNo
    /file = Samp3
    /by Stratum.
dataset name Samp4.
exe.
dataset close Samp3.
dataset close ClusNo.

dataset activate Samp4.
sort cases by InitCluster.

match files table = Samp4
    /file = Main2
    /by InitCluster.
exe.
dataset name meg.
dataset close Samp4.
dataset close Main2.

* 4.4.2 Modify interview result.

* HH.

if any( QHRESULT, 3, 11 ) and not missing( ls515 ) QHRESULT = 1.

if QHRESULT = 11 and QHSTOP <> "" QHRESULT = 3.
if QHRESULT = 11 and QHSTOP = "" QHRESULT = 9.

* RSI.

if QIRESULT = 11 and not missing( SF1111 ) QIRESULT = 1.

* 4.4.3 Adjustment of non-response.

* HH.

compute Hcomp = 0.
compute HnoResp = 0.
compute HnoExist = 0.
compute HnoDeter = 0.
if any( QHRESULT, 1,2 ) Hcomp = 1.
if any( QHRESULT, 3,6,8 ) HnoResp = 1.
if any( QHRESULT, 5,7 ) HnoExist = 1.
if any( QHRESULT, 9 ) HnoDeter = 1.

* RSI.

compute Rcomp = 0.
compute RnoResp = 0.
compute RnoExist  = 0.
compute RnoDeter = 0.
if any( QIRESULT, 1,2 ) Rcomp = 1.
if any( QIRESULT, 3, 4, 5, 10,11 ) RnoResp = 1.
if any( QIRESULT, 6 ) and QHRSIELIG > 1 RnoResp = 1.
if any( QIRESULT, 6 ) and QHRSIELIG = 1 RnoExist = 1.

aggregate outfile = * mode = addvariables
    /break Stratum QHTYPE
    /HHcomp = sum( Hcomp )
    /HHnoResp = sum( HnoResp )
    /HHnoExist = sum( HnoExist )
    /HHnoDeter = sum( HnoDeter ).
aggregate outfile = * mode = addvariables
    /break = Stratum 
    /RSIcomp = sum( Rcomp )
    /RSInoResp = sum( RnoResp )
    /RSInoExist = sum( RnoExist )
    /RSInoDeter = sum( RnoDeter ).

compute HHnoDeterCorr = HHnoResp / ( HHnoResp + HHnoExist ).
compute RSInoDeterCorr = RSInoResp / ( RSInoResp + RSInoExist ).

* Non-response correction ratio.

compute HHNoRespCorr = 1 / ( HHcomp / ( HHcomp + ( HHnoResp + HHnoDeter * HHnoDeterCorr ) ) ).
compute RSINoRespCorr = 1 / ( RSIcomp / ( RSIcomp + ( RSInoResp + RSInoDeter* RSInoDeterCorr ) ) ).

* 4.4.4 Weights.

* 4.4.4.1 Expansion weights.

if QHTYPE = 1 P_h = Ph_Nat.
if QHTYPE = 2 P_h = Ph_Ref.

compute HHexpweigh_S = 1 / P_h.
compute HHexpweigh_A = HHexpweigh_S * HHNoRespCorr *  ClusNRcorrector.

compute RSIexpweigh_S = HHexpweigh_A * ( 1 /  Prsi ).
compute RSIexpweigh_A = RSIexpweigh_S * RSINoRespCorr.

* Trim expansion weights.

compute HHexpweigh2 = HHexpweigh_A.
if range( QHRESULT, 3,9 ) HHexpweigh2 = $sysmis.

aggregate outfile = * mode=addvariables
    /break = QHGEOCODE1
    /SDHHexpweigh = sd( HHexpweigh2 )
    /MEDHHexpweigh = median( HHexpweigh2 ).

compute HHexpweigh = HHexpweigh2.
if HHexpweigh2 > MEDHHexpweigh + 2 * SDHHexpweigh   HHexpweigh = MEDHHexpweigh + 2 * SDHHexpweigh.

dataset activate meg.

compute RSIexpweigh2 = RSIexpweigh_A.
if range( QIRESULT, 3,11 ) or RSIexpweigh_A = 0 RSIexpweigh2 = $sysmis.

aggregate outfile = * mode=addvariables
    /break =  QHGEOCODE1
    /SDRSIexpweigh = sd( RSIexpweigh2 )
    /MEDRSIexpweigh = median( RSIexpweigh2 ).

compute RSIexpweigh = RSIexpweigh2.
if RSIexpweigh2 > MEDRSIexpweigh + 2 * SDRSIexpweigh   RSIexpweigh = MEDRSIexpweigh + 2 * SDRSIexpweigh.

* 4.4.4.2 Relative weights.

aggregate outfile = *  mode = addvariables
    /break =  QHGEOCODE1
    /MeanHHexpweigh = mean( HHexpweigh ).
aggregate outfile=*  mode = addvariables
    /break = QHGEOCODE1
    /MeanRSIexpweigh = mean( RSIexpweigh ).
exe.
compute HHrelweigh = HHexpweigh / MeanHHexpweigh.
compute RSIrelweigh = RSIexpweigh / MeanRSIexpweigh.
exe.

* 4.5 Save weights.

compute PSU = InitCluster.

variable labels QHCLUSTER "Cluster ID in survey field work"
    /Stratum "Stratum"
    /PSU "Primary sampling unit for PPS cluster sampling"
    /HHexpweigh "Household expansion weights for PPS cluster sampling"
    /HHrelweigh "Household relative weights for PPS cluster sampling" 
    /RSIexpweigh "RSI expasion weights for PPS cluster sampling"
    /RSIrelweigh "RSI relative weights for PPS cluster sampling".

save outfile "SampPath\Weight_ETH_PPS.sav" /keep QHCLUSTER QHID QIIDV Stratum PSU HHrelweigh RSIrelweigh HHexpweigh RSIexpweigh
    /rename ( PSU = PSU_PPS ) ( HHrelweigh = HHrelweigh_PPS ) ( RSIrelweigh =  RSIrelweigh_PPS ) ( HHexpweigh = HHexpweigh_PPS ) ( RSIexpweigh = RSIexpweigh_PPS ). 

dataset close all.


