Stata有一些导出统计、回归结果的工具,比如outreg2, asdoc, esttab,但这几个工具对中文的支持都不好,导出rtf文件里的中文会变成乱码。这是由于它们用了UTF-8编码导出,rtf的标准不认得它。
我折腾了一下,写了个小工具把rtf文件的UTF-8转成Word能够认识的Unicode编码。用法很简单,把这个脚本保存为一个.vbs文件,然后把Stata导出的rtf文件拖到这个vbs文件的图标上,它就会生成一个加了后缀“_c”的rtf文件,乱码消失不见。理论上这个小工具也能解决日文、韩文等其它多字节编码问题。
这段代码是匆匆写就的,效率低且丑,欢迎修改。
'Purpose: Encode rtf file containing Multi-byte-char into Unicode format, which is readable in Word.
' A new rtf file with surfix _c will be generated in the same directory of the original file.
'Usage: Drag and drop your RTF file onto this script. Windows XP or above is required.
'Contact: wolfccb.com
Set objArgs = WScript.Arguments
if objArgs.Count=0 then
msgbox "Please drag and drop your RTF file onto this script."
WScript.Quit
end if
filename=objArgs (0)
Set fso = CreateObject("Scripting.FileSystemObject")
Set f = fso.OpenTextFile(filename, 1, false)
txt = f.ReadAll()
f.Close()
txt = Multi_Encode(txt)
txt = convert(txt)
fileout = replace (filename,".rtf","_c.rtf")
Set f = fso.OpenTextFile(fileout, 2, true)
f.Write txt
f.Close()
Set f=Nothing
Set fso=Nothing
Set objArgs=Nothing
Function convert(s)
For i = 1 To Len(s)
if mid(s,i,2)="%E" then
t0=mid(s,i,9)
t1=DecodeUTF8(t0)
s=replace (s,t0,t1)
end if
Next
convert=s
End Function
Function Multi_Encode(ByVal str)
Dim i
Dim code
For i = 1 To Len(str)
code = Mid(str, i, 1)
If Asc(code) < 0 Then
code = Hex(Asc(code))
If Len(code) = 1 Then
code = "0" & code
End If
If CByte("&H" & Right(code, 2)) < 127 Then
code = "%" & Left(code, 2) & Chr(CByte("&H" & Right(code, 2)))
Else
code = "%" & Left(code, 2) & "%" & Right(code, 2)
End If
End If
Multi_Encode = Multi_Encode & code
Next
Multi_Encode=replace(Multi_Encode,"%5C","\")
End Function
Function DecodeUTF8(sInput)
Dim oStream, i, b1, b2, b3
Set oStream = CreateObject("ADODB.Stream")
oStream.Type = 2 'binary mode
oStream.Open
For i = 1 To Len(sInput) Step 3
b1 = CByte("&H" & Mid(sInput, i + 1, 2))
If b1 < &H80 Then 'single byte character
oStream.WriteText Chr(b1)
ElseIf (b1 And &HE0) = &HC0 Then 'two byte character
b2 = CByte("&H" & Mid(sInput, i + 4, 2))
oStream.WriteText Chr(((b1 And &H1F) * &H40) + (b2 And &H3F))
i = i + 3
ElseIf (b1 And &HF0) = &HE0 Then 'three byte character
b2 = CByte("&H" & Mid(sInput, i + 4, 2))
b3 = CByte("&H" & Mid(sInput, i + 7, 2))
oStream.WriteText ChrW(((b1 And &HF) * &H1000) + ((b2 And &H3F) * &H40) + (b3 And &H3F))
i = i + 6
End If
Next
oStream.Position = 0 'reset stream position
DecodeUTF8 = ""
While Not oStream.EOS
DecodeUTF8 = DecodeUTF8 & "\u" & CStr(AscW(oStream.ReadText(1))) & "?"
Wend
oStream.Close
Set oStream = Nothing
End Function