续上两篇:
https://bbs.pinggu.org/thread-6038753-1-1.html
https://bbs.pinggu.org/thread-6038766-1-1.html
# 得到初始种群以后,需要进行交叉、变异等遗传行为,选择部分个体进入下一代,逐代繁衍找到最优解
# 为让算法不会掉入局部最优解,除了变异,还会随机保留部分适应度低的个体,有可能携带优秀基因
# 交叉又叫基因重组,就是对参与交叉的两个染色体或个体的一个或多个等位基因互换
# needgs为给定的特征长度
# needgs=3
# 染色体交叉函数interCross
interCross <- function(individuals, p, dataName){
L <- nrow(individuals)
individuals.reserve=NULL
if(L%%2==1){
reserve_gene_id <- sample(1:L, 1)
individuals.reserve <- individuals[reserve_gene_id,]
individuals <- individuals[setdiff(1:L, reserve_gene_id),]
rownames(individuals)=NULL
L=L-1
}
individuals$group=sample(rep(1:(L/2),2),L)
individuals.cross=NULL
for(i in 1:(L/2)){
# 随机生成一个概率值,如果小于p,则进行交叉,否则保留原始个体进入下一代
rand_p=runif(1,0,1)
sub0=individuals[individuals$group==i,]
# 若发生交叉的两个个体基因型一样,那么随机生成一个与另外一个再交叉
t0=unlist(sub0[,1:needgs])
names(t0)=NULL
if(length(unique(t0))== needgs){
t0=genIndividuals(1,needgs,dataName)
t0$group=i
sub0[1,]=t0
}
sub0$group=NULL
if(rand_p<p){
individuals.cross <- rbind(individuals.cross, icross(sub0))
}else{
individuals.cross <- rbind(individuals.cross, sub0)
}
}
individuals.cross=data.frame(individuals.cross)
individuals.cross=rbind(individuals.cross, individuals.reserve)
rownames(individuals.cross)=NULL
return(individuals.cross[,1:needgs])
}
# 对染色体进行交叉操作
icross <- function(subpair){
subpair.all=NULL
subpair.all <- rbind(subpair, subpair[1,])
subpair.all <- rbind(subpair, subpair[2,])
# 从1到needgs的基因位中随机找1~(needgs/2)个,完成基因重组
geneLoc=sample(1:needgs,sample(1:(needgs/2),1))
subpair.one=subpair[1,]
subpair[1,geneLoc]=subpair[2,geneLoc]
subpair[2,geneLoc]=subpair.one[,geneLoc]
subpair.all <- rbind(subpair.all,subpair)
subpair.all=data.frame(subpair.all)
return(subpair.all)
}
# # 举例说明
# A <- c("g(Dadd,<x1>,<x2>)", "g(Dlog,<x1>)", "g(Dadd,<g(Dlog,<x2>)>,<x3>)")
# B <- c("g(sin,<x3>)", "g(Dadd,<g(sin,<x1>)>,<g(Dlog,<x2>)>)", "g(Dadd,<g(cos,<x4>)>,<x3>)")
#
# # AB基因二叉树
# par(mfrow=c(2,3))
# plotTree(A[1])
# title("个体A-基因1")
# plotTree(A[2])
# title("个体A-基因2")
# plotTree(A[3])
# title("个体A-基因3")
# plotTree(B[1])
# title("个体B-基因1")
# plotTree(B[2])
# title("个体B-基因2")
# plotTree(B[3])
# title("个体B-基因3")
# par(mfrow=c(1,1))
#
# # 使用interCross函数进行交叉操作
# indvs <- NULL
# indvs <- rbind(indvs,A)
# indvs <- rbind(indvs,B)
# indvs=data.frame(indvs,stringsAsFactors = F)
# out=interCross(indvs, p=0.85, 'vdata')[3:4,]
# # 交叉后的AB基因二叉树
# par(mfrow=c(2,3))
# plotTree(out[1,1])
# title("个体A-基因1")
# plotTree(out[1,2])
# title("个体A-基因2")
# plotTree(out[1,3])
# title("个体A-基因3")
# plotTree(out[2,1])
# title("个体B-基因1")
# plotTree(out[2,2])
# title("个体B-基因2")
# plotTree(out[2,3])
# title("个体B-基因3")
# par(mfrow=c(1,1))
#
# 变异是指基因突变,某个体的某个基因突然改变,mutat函数
mutat <- function(individuals, p , dataName){
addInd <- NULL
for(i in 1:nrow(individuals)){
if(runif(1,0,1)<p){
geneLoc=sample(1:needgs,1)
print(paste("第", i, "个个体<", geneLoc, "号基因>发生突变。", sep=""))
# 随机找一个基因位,并随机生成一个基因
bak=individuals[i,]
bak[,geneLoc]=randomGetTree(dataName,vfeatures)
addInd <- rbind(addInd, bak)
}
}
individuals <- rbind(individuals,addInd)
rownames(individuals)=NULL
return(individuals)
}
#
# # 其中vfeatures是原始数据下标属性,需初始化
# vfeatures <- 1:4
#
# # 举例说明基因突变,设置突变概率为0.9
# indvs <- NULL
# indvs <- rbind(indvs, A)
# indvs <- data.frame(indvs, stringsAsFactors = F)
# out=mutat(indvs, p=0.9, 'vdata')
# # 突变前后的A基因二叉树
# par(mfrow=c(2,3))
# plotTree(out[1,1])
# title("个体A-基因1(突变前)")
# plotTree(out[1,2])
# title("个体A-基因2(突变前)")
# plotTree(out[1,3])
# title("个体A-基因3(突变前)")
# plotTree(out[2,1])
# title("个体A-基因1(突变后)")
# plotTree(out[2,2])
# title("个体A-基因2(突变后)")
# plotTree(out[2,3])
# title("个体A-基因3(突变后)")
# par(mfrow=c(1,1))
# ============================ 实例分析 ================================
# 第一步,构造数据集
vdata <- matrix(data = c(7,1,11,11,7,11,3,1,2,21,1,11,10,
26,29,56,31,52,55,71,31,54,47,40,66,68,
6,15,8,8,6,9,17,22,18,4,23,9,8,
60,52,20,47,33,22,6,44,22,26,34,12,12,
1,1,0,0,0,0,0,0,0,1,0,0,1),
# 78.5,74.3,104.2,87.6,95.9,109.2,102.7,72.5,93.1,115.9,83.8,113.3,109.4),
nrow = 13, ncol = 5)
colnames(vdata) <- c("x1","x2","x3","x4","Y")
# 对x1-x4进行标准化处理
vdata[,1:4] <- scale(vdata[,1:4])
vdata <- as.data.frame(vdata)
# 第二步,计算原始特征进行交叉验证得到的误差平方和
stdErr=0
row_vdata <- nrow(vdata)
for(i in 1:row_vdata){
trainData = vdata[setdiff(1:row_vdata,i), ]
testData = vdata[i, ]
newfit <- lm(Y~., data = trainData)
testData$newPred <- predict(newfit, testData)
stdErr <- stdErr + sum(abs(testData$Y - testData$newPred)^2)
}
stdErr
# 设置原始数据的属性下标
vfeatures = 1:4
# 第三步,产生初始种群,假设初始种群规模为100
popSize = 100
# 设置特征长度为3
needgs = 3
individuals = genIndividuals(popSize, needgs)
# 第四步,设置最大迭代次数为100,并设置终止条件为种群中最高适应度除以平均适应度的alpha不超过1.001
alphaV <- NULL
maxVal <- NULL
ageId <- NULL
for(i in 1:100)
{
ageId <- c(ageId,i)
# 交叉,先保留父代
inter.obj <- interCross(individuals, p=0.85, 'vdata')
# 变异。保留变异前的个体
mutat.obj <- mutat(inter.obj, p=0.05, 'vdata')
# 计算适应度,并选择
adjusts <- NULL
for(k in 1:nrow(mutat.obj))
{
adjusts=c(adjusts, getAdjust(unlist(mutat.obj[k, ])))
}
mutat.obj$adjusts=adjusts
# 按adjusts排序,取前0.4*nrow(individuals)个个体进行返回
# 0.6*nrow(individuals)个随机从剩余的个体的选取
individuals.ready=mutat.obj[order(mutat.obj$adjusts, decreasing = T), ]
topN=(round(popSize*0.4))
individuals=individuals.ready[1:topN,]
individuals=rbind(individuals,
individuals.ready[sample((topN+1):length(adjusts), popSize - topN), ])
adjusts=individuals$adjusts
alpha=max(adjusts)/mean(adjusts)
alphaV <- c(alphaV,alpha)
maxVal <- c(maxVal, max(individuals$adjusts))
if(mean(adjusts>0 && alpha<1.6))
{
print(paste("进化终止,算法已收敛!共进化", i, "代!"))
break;
}
}
# 第五步:提取最佳特征组合,构建新数据集,同时绘制出各特征表达式的二叉树
# 取individuals中的第一个个体,并生成特征
treeExpArray=individuals[1,1:needgs]
tempData=NULL
for(treeExp in treeExpArray)
{
feature=eval(parse(text=gsub('>', '', gsub('<', '', treeExp))))
if(is.na(sd(feature)) || is.nan(sd(feature)) || sd(feature)==0)
{
feature=rep(0,NROW(feature))
}
tempData <- cbind(tempData,feature)
}
colnames(tempData)=paste("X", 1:needgs,sep="")
tempData=data.frame(tempData)
tempData$Y=vdata$Y
head(tempData)
par(mfrow=c(1,3))
plotTree(treeExpArray[1,1])
title("特征-X1")
plotTree(treeExpArray[1,2])
title("特征-X2")
plotTree(treeExpArray[1,3])
title("特征-X3")
par(mfrow=c(1,1))
完结,欢迎大家指正讨论!