Here is an example to illustrate the SUR is more efficient when errors are correlated.
For more information about SUR at the link below,
http://en.wikipedia.org/wiki/Seemingly_unrelated_regressions
%let rho=0.5;
%let size=1000;
data binormal;
rho=ρ
a1=sqrt((1+rho)/2);
a2=sqrt((1-rho)/2);
do i=1 to &size;
rd1=rannor(12390);
rd2=rannor(12390);
e1=a1*rd1+a2*rd2;
e2=a1*rd1-a2*rd2;
output;
end;
keep e1 e2;
run;
data tmp;
set binormal;
x1=rannor(123);
x2=rannor(123);
y1=1+1*x1+e1;
y2=2+0.5*x2+e2;
run;
proc corr data=tmp;
var y1 y2 e1 e2;
run;
proc model data=tmp;
parms a b c d=0;
y1=a+b*x1;
y2=c+d*x2;
fit y1 y2/ols sur fiml;
run;
quit;
proc nlmixed data=tmp;
parms a b c d=0 rho=0.3 s1=1 s2=1;
bounds 0<rho<1;
bounds s1 s2>0;
xbeta1= a+b*x1;
xbeta2= c+d*x2;
u1=(y1-xbeta1);
u2=(y2-xbeta2);
p=( 1/( 2*3.14159*s1*s2*sqrt(1-rho**2) ) ) * exp( -( (u1/s1)**2+(u2/s2)**2 -2*rho*(u1/s1)*(u2/s2)) /(2*(1-rho**2)) );
p=max(min(p,1-1e-20),1e-20);
ll=log(p);
model y1 ~ general(ll);
run;