做的是性别工资差异分布分解,数据文件rwage,schooling,year,pe,pesq这几个变量
use F:\jmp.dta, clear
/* run basic Mincer equation to get b1m b1f */
reg rwage schooling pe pesq if M==1 & year==1988
*** then we get b0m, copy to excel ***
reg rwage schooling pe pesq if M==1 & year==2004
*** then we get b1m, copy to excel ***
su rwage schooling pe pesq if M==1 & year==1988
*** then we get x0m, copy to excel ***
su rwage schooling pe pesq if M==0 & year==1988
*** then we get x0f, copy to excel ***
su rwage schooling pe pesq if M==1 & year==2004
*** then we get x1m, copy to excel ***
su rwage schooling pe pesq if M==0 & year==2004
*** then we get x1f, copy to excel ***
*** thus, we get x1m x1f x0m x0f and b1m b1f ***
*** by following the decompositon formula, we could calculate the values of the first two component**********
*** put the two values into table 8.1
use F:\jmp.dta, clear
keep if year==1988
drop if rwage==.
reg rwage schooling pe pesq if M==1
predict resid88m if e(sample), resid
predict resid88f if M==0, resid
save beginyear.dta, replace
*** Here we select the sample of comparison year***
use F:\jmp.dta, clear
drop if rwage==.
keep if year==2004
reg rwage schooling pe pesq if M==1
predict resid04m if e(sample), resid
predict resid04f if M==0, resid
save endyear.dta, replace
上面的比较简单知道,下面的就不懂了。。。
*do for putting female residual on male residual distribution of the base year, and getting the percentile*
use beginyear.dta, clear
gen mearn=resid88m if M==1
gen fearn=resid88f if M==0
# delimit cr
local i=1
while `i'<100 {
egen z`i'=pctile (mearn) if rwage~=., p(`i')
local i=`i'+1
}
ge rank=1 if fearn<z1
local j=1
while `j'<99 {
local k=`j'+1
replace rank=`j'+1 if fearn>=z`j' & fearn<z`k'
local j=`j'+1
}
replace rank=100 if fearn>=z99 & M==0
drop z1-z99
sort rank
save beginyear.dta, replace
*getting 2004 male residual percentile, and replace the residual of base year with this residual of the same percentile by merging data*
*******************************
use endyear, clear
keep if M==1
# delimit cr
keep if rwage~=.
egen n = count(resid04m)
egen i = rank(resid04m)
gen cen = (i - 0.5) / n
gen ce=round(cen, 0.01)
gen rank= ce*100
replace rank=round(rank,1)
sort rank
keep rank resid04m
ren resid04m res
drop if rank==.
collapse res, by (rank)
sort rank
save eym.dta, replace
******************************
use beginyear.dta, clear
egen n = count(resid88m)
egen i = rank(resid88m)
gen cen = (i - 0.5) / n
gen ce=round(cen, 0.01)
gen rank2= ce*100
replace rank=round(rank2,1) if M==1
sort rank
merge rank using eym.dta
drop _merge
drop if M==.
save beginyear.dta, replace
*After finishing all procedures, we get the result of every term in the decomposition equation*
******************************
use beginyear.dta, clear
su res if M==1 /* 这个是 0m 1m */
su res if M==0 /* 这个是 0f 1m */
su resid88m resid88f /* 这两个是 0m 0m和 0f 0m*/
******************************
use endyear, clear
su resid04m resid04f /* 这两个是 1m 1m和 1f 1m*/
*** by following the decompositon formula, we could calculate the values of the last two components *********
*** put the two values into table 8.1
/*** calculate the percentage of each component’s contribution to the change in the
overall differential over the two years. ****/
**end