> if (!require("xlsx")) install.packages("xlsx")
> vec <- c( )
> n = 0
> for (i in 1:12)
+ {
+ df1 <- read.xlsx(file="C:\收集链接.xls",sheetIndex=i,encoding='UTF-8') #读取excel文件
> df2 <- as.matrix(df1[ ,1])[apply(as.matrix(df1[ ,1]),1,nchar)>40,] #df2 is a vector
Error in as.matrix(df1[, 1]) : 找不到对象'df1'
> df3 <- apply(as.matrix(df2),1,function(x) strsplit(x,'\\?')[[1]][1])
Error in as.matrix(df2) : 找不到对象'df2'
>
> index1 <- duplicated(df3)
Error in duplicated(df3) : 找不到对象'df3'
> if (sum(index1)==0) {
+ cat(paste('sheet',i,'内没有重复项!',sep=''),file="D:/重复.txt",append=TRUE,"\n")
+ }else{
+ cat(paste('sheet',i,'内有重复项:',sep=''),file="D:/重复.txt",append=TRUE,"\n")
+ write.table(df3[index1],file="D:/重复.txt",row.names=FALSE,col.names=FALSE,append=TRUE)#写出重复项
+ }
错误: 找不到对象'index1'
>
>
> vec <- c(vec,df3[!index1])
错误: 找不到对象'df3'
> index2 <- duplicated(vec)
> if (sum(index2)==0) {
+ cat(paste('sheet',i,'与前面无重复项!',sep=''),file="D:/重复.txt",append=TRUE,"\n")
+ }else{
+ cat(paste('sheet',i,'与前面有重复项:',sep=''),file="D:/重复.txt",append=TRUE,"\n")
+ write.table(vec[index2],file="D:/重复.txt",row.names=FALSE,col.names=FALSE,append=TRUE)#写出重复项
+ }
Error in file(file, ifelse(append, "a", "w")) : 无法打开链结
此外: Warning message:
In file(file, ifelse(append, "a", "w")) :
无法打开文件'D:/重复.txt': Permission denied
>
> index3 <- index2[(n+1):length(index2)]
> filter <- df3[!index1][!index3] # filter <- df1[!index1, ][!index3, ]
错误: 找不到对象'df3'
> write.xlsx(filter,file="D:/去重复.xls",sheetName=paste('sheet',i,sep=''),append=TRUE) #写出去重复项
Error in as.data.frame.default(x[[i]], optional = TRUE) :
cannot coerce class ""function"" to a data.frame
>
> vec <- vec[!index2]
> n <- length(vec)
> }