受访者驱动抽样(RDS)是一种专门针对隐匿人群的抽样方法,常用于跨性别者、暗娼、MSM等因耻辱感和法律制度的约束难以识别和接触的人群,并逐渐应用于一般人群。这里给出国外学者写的一个相关R程序:
基本参数设定:
n <- 20000 #pop size
ns <- 500 # rds sample size
seeds <- 7 # #of seeds
#d <- round(rlnorm(n,meanlog = 1)) + 1
d <- round(rexp(n,1/5)) + 1 #heavy tailed degrees
#d <- rpois(n,lambda = 5) + 1 #light tail
#d <- rep(10, n)
g <- rep(1,n)
首先构造邻接矩阵的函数:
makeGraph <- function(d){
 n <- length(d)
 edges <- list()
 d1 <- d
 while(sum(d1)>0.5){
  l <- sample.int(n,1,prob=d1)
  dt <- d1
  dt[l] <- d1[l] - 1
  if(sum(dt) > 0.5){
   k <- sample.int(n,1,prob=dt)
   v <- TRUE
   d1[k] <- d1[k] - 1
  }else{
   k <- sample.int(n,1,prob=d)
   v <- FALSE
  }
  d1[l] <- d1[l] - 1
  edges[[length(edges)+1]] <- c(l,k,v)
 }
 el <- do.call(rbind,edges)
 el
}
el <- makeGraph(d)   #el即为相关联的矩阵
构造RDS抽样函数:
sampRDS <- function(el, d, ns, g, ss, biased=TRUE,pr = c(0,.1,.9)){
 maxR <- length(pr) -1
 n <- length(g)
 ml <- if(is.factor(g)) max(levels(g)) else max(g)
 if(biased){
  seeds <- sample.int(n,ns,prob=as.numeric(as.factor(g))-1)
 }else{
  seeds <- sample.int(n,ns, prob = d)
 }
 samp <- seeds
 recr <- rep(-1,ns)
 time <- 0 + (1:ns)/10000000
 rcTime <- rexp(ns)
 v <- rep(-1,ns)
 t1 <- time
 while(length(samp) < ss){
  subjIndex <- which.min(t1 + rcTime)
  if(length(subjIndex)==0){
   print("redraw")
   subj <- sample( (1:n)[-samp],1)
   samp <- c(samp,subj)
   recr <- c(recr,-1)
   time <- c(time,max(time+1))
   rcTime <- c(rcTime,rexp(1))
   t1 <- c(t1,max(time+1))
   v <- c(v,-1)
  }else{
   t <- t1[subjIndex] + rcTime[subjIndex]
   t1[subjIndex] <- NA
   subj <- samp[subjIndex]
   nr <- sample(0:maxR,1,replace=FALSE,prob=pr)
   nbrs <- rbind(el[el[,1]==subj,2:3,drop=FALSE],
                 el[el[,2]==subj & el[,3]>0.5,c(1,3),drop=FALSE]
   )
   nbrs <- nbrs[!(nbrs[,1] %in% samp) & nbrs[,1]!=subj,,drop=FALSE]
   nr <- min(nr,nrow(nbrs))
   if(nr>0){
    s <- sample.int(nrow(nbrs),nr,replace=FALSE)
    s <- s[!duplicated(nbrs[s,1])]
    nr <- length(s)
    samp <- c(samp,nbrs[s,1])
    recr <- c(recr,rep(subj,nr))
    tm <- t + t + (0:(nr-1)) / 1000000
    time <- c(time,tm)
    t1 <- c(t1,tm)
    rcTime <- c(rcTime,rexp(nr))
    v <- c(v,nbrs[s,2])
   }
  }
 }
data.frame(subject=samp,recruiter=recr,time=time,v=v)
}
rds <- sampRDS(el, d, seeds,g,ns,FALSE, pr = c(0,.1,.9))
基本函数如上。