方法分享：R语言乱码怎么办

12773

收藏 2014-10-09

#抓取信息

library(RCurl)
library(XML)

#伪装报头
myheader=c(
  "User-Agent"="Mozilla/5.0(Windows;U;Windows NT 5.1;zh-CN;rv:1.9.1.6",
  "Accept"="text/htmal,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
  "Accept-Language"="en-us",
  "Connection"="keep-alive",
  "Accept-Charset"="GB2312,utf-8;q=0.7,*;q=0.7"
)

#测试单个url下的信息读取

testurl<-"http://www.fruitday.com/web/product_list/40"
temp<-getURL(testurl,httpheader=myheader,encoding="UTF-8")
k<-htmlParse(temp)  #解析网页
write.table(k,"k.txt")
#k
name.node<-getNodeSet(k,'//li[@class="cplist-p02"]/a')
style.node<-getNodeSet(k,'//li[@class="cplist-p03"]')
price.node<-getNodeSet(k,'//li[@class="cplist-p04"]')
name.node
style.node
price.node
#price<-xmlValue(price.node1[[1]])
#price
#name.node
#price<-sapply(getNodeSet(k,'//p[@class="price"]/i/text('),xmlValue)
#url.node1
#name<-xmlGetAttr(url.node[[1]],'title')
#name
#name<-xmlGetAttr(url.node1[[1]],'title')
#name
#getNodeSet(k,'//p[@class="title"]/a[@title]')
price<-c()
style<-c()
name<-c()
for (i in 1:200){
  name<-xmlValue(name.node[])
  style<-xmlValue(style.node[])
  price<-xmlValue(price.node[])
  #name<-iconv(name,"UTF-8","gbk")#解决中文正常显示问题
}
name<-iconv(name,"UTF-8","gbk")
df<-data.frame(na=name,sty=style,pr=price)
write.table(df,"tt.txt")