抽样问题 - 经管之家

ywh19860616

19224

收藏 2010-10-16

用R使用了如下命令：
> y <- rnorm(50)
> x <- matrix(rnorm(100),50)
> fit <- rq(y~x,tau = .4)
Call:
rq(formula = y ~ x, tau = 0.4)
Coefficients:
(Intercept)       x1       x2
-0.38203826  0.09384216  0.09347414
Degrees of freedom: 50 total; 47 residual
Call: rq(formula = y ~ x, tau = 0.4)
tau: [1] 0.4
Coefficients:
         Value Std. Error t value  Pr(>|t|)
(Intercept) -0.38204  0.21700 -1.76056  0.08482
x1          0.09384  0.18203 0.51552  0.60861
x2          0.09347  0.24560 0.38059  0.70522

但是我现在想做的数据结构不是上面的一列数据格式（简称为截面），我现在想做面板（问一句，对于bootstrap，不同数据类型会不同吗，如面板数据和截面数据？），计算系数的程序已经
基本上有了，就是R中quantreg的rq.fit.sfn。但是这个程序只运行出coeff，而t值和p值都没有，所以我想用bootstrap计算（使用block bootstrap即可）
应该怎么实现？

> summary.rq
function (object, se = "nid", covariance = FALSE, hs = TRUE,
...)
{
if (object$method == "lasso")
      stop("no inference for lasso'd rq fitting: try rqss (if brave)")
mt <- terms(object)
m <- model.frame(object)
y <- model.response(m)
x <- model.matrix(mt, m, contrasts = object$contrasts)
wt <- model.weights(object$model)
tau <- object$tau
eps <- .Machine$double.eps^(2/3)
coef <- coefficients(object)
if (is.matrix(coef))
      coef <- coef[, 1]
vnames <- dimnames(x)[[2]]
resid <- object$residuals
n <- length(resid)
p <- length(coef)
rdf <- n - p
if (!is.null(wt)) {
      resid <- resid * wt
      x <- x * wt
      y <- y * wt
}
if (missing(se)) {
      if (n < 1001 & covariance == FALSE)
         se <- "rank"
      else se <- "nid"
}
if (se == "rank") {
      f <- rq.fit.br(x, y, tau = tau, ci = TRUE, ...)
}
if (se == "iid") {
      xxinv <- diag(p)
      xxinv <- backsolve(qr(x)$qr[1:p, 1:p, drop = FALSE],
         xxinv)
      xxinv <- xxinv %*% t(xxinv)
      pz <- sum(abs(resid) < eps)
      h <- max(p + 1, ceiling(n * bandwidth.rq(tau, n, hs = hs)))
      ir <- (pz + 1):(h + pz + 1)
      ord.resid <- sort(resid[order(abs(resid))][ir])
      xt <- ir/(n - p)
      sparsity <- rq(ord.resid ~ xt)$coef[2]
      cov <- sparsity^2 * xxinv * tau * (1 - tau)
      scale <- 1/sparsity
      serr <- sqrt(diag(cov))
}
else if (se == "nid") {
      h <- bandwidth.rq(tau, n, hs = hs)
      if (tau + h > 1)
         stop("tau + h > 1:  error in summary.rq")
      if (tau - h < 0)
         stop("tau - h < 0:  error in summary.rq")
      bhi <- rq.fit.fnb(x, y, tau = tau + h)$coef
      blo <- rq.fit.fnb(x, y, tau = tau - h)$coef
      dyhat <- x %*% (bhi - blo)
      if (any(dyhat <= 0))
         warning(paste(sum(dyhat <= 0), "non-positive fis"))
      f <- pmax(0, (2 * h)/(dyhat - eps))
      fxxinv <- diag(p)
      fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p, drop = FALSE],
         fxxinv)
      fxxinv <- fxxinv %*% t(fxxinv)
      cov <- tau * (1 - tau) * fxxinv %*% crossprod(x) %*%
         fxxinv
      scale <- mean(f)
      serr <- sqrt(diag(cov))
}
else if (se == "ker") {
      h <- bandwidth.rq(tau, n, hs = hs)
      if (tau + h > 1)
         stop("tau + h > 1:  error in summary.rq")
      if (tau - h < 0)
         stop("tau - h < 0:  error in summary.rq")
      uhat <- c(y - x %*% coef)
      h <- (qnorm(tau + h) - qnorm(tau - h)) * min(sqrt(var(uhat)),
         (quantile(uhat, 0.75) - quantile(uhat, 0.25))/1.34)
      f <- dnorm(uhat/h)/h
      fxxinv <- diag(p)
      fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p, drop = FALSE],
         fxxinv)
      fxxinv <- fxxinv %*% t(fxxinv)
      cov <- tau * (1 - tau) * fxxinv %*% crossprod(x) %*%
         fxxinv
      scale <- mean(f)
      serr <- sqrt(diag(cov))
}
else if (se == "boot") {
      B <- boot.rq(x, y, tau, ...)
      cov <- cov(B)
      serr <- sqrt(diag(cov))
}
if (se == "rank") {
      coef <- f$coef
}
else {
      coef <- array(coef, c(p, 4))
      dimnames(coef) <- list(vnames, c("Value", "Std. Error",
         "t value", "Pr(>|t|)"))
      coef[, 2] <- serr
      coef[, 3] <- coef[, 1]/coef[, 2]
      coef[, 4] <- if (rdf > 0)
         2 * (1 - pt(abs(coef[, 3]), rdf))
      else NA
}
object <- object[c("call", "terms")]
if (covariance == TRUE) {
      object$cov <- cov
      if (se == "iid")
         object$scale <- scale
      if (se %in% c("nid", "ker")) {
         object$Hinv <- fxxinv
         object$J <- crossprod(x)
         object$scale <- scale
      }
      else if (se == "boot") {
         object$B <- B
      }
}
object$coefficients <- coef
object$rdf <- rdf
object$tau <- tau
class(object) <- "summary.rq"
object
}

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

全部回复

epoh

2010-10-17 11:33:45

Normal bootstrap works by creating data
by randomly selecting observations

Block bootstrap does this by randomly selecting
blocks of data, rather than individual data.

想了解一下,楼主是想保留data Block部分的当时特性?
如果是的话,等于是不用summary
重新编程,算出系数的standard errors.

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-17 13:18:28

#epoh
嗯，我用了一个面板数据模型，因为我程序只运行出来系数，而没有给出对应的p值和t值。看了文献，文献提到使用block bootstrap抽样来计算standard errors，从而计算出对应的t值和p-value等拟合优度。我的目的就是这个。
我看了一些文献，说面板数据抽样和一般数据抽样存在不同，所以想问这应该怎么实现？

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-17 16:32:06

library(quantreg)
n=1000    #n observations
y <- rnorm(n)
x <- matrix(rnorm(2*n),n)
fit <- rq(y~x,tau = .4)
fit
summary(fit,se = "boot", bsmethod= "xy")
#Coefficients:
#          Value Std. Error t value  Pr(>|t|)
#(Intercept) -0.24258  0.03358 -7.22396  0.00000
#x1             0.05268  0.03514 1.49913  0.13416
#x2             0.03694  0.02999 1.23160  0.21839
######block bootstrap
b=1000    #Number of bootstrap samples
boot_bhat=matrix(NA,b, dim(x)[2]+1)
block_length = 50
num_blocks = 20 #n/block_length
Indices = seq(1:n)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){       #Number of bootstrap samples
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]    #Find which data are in each block  250 x 20
Ind_sim = c(Ind_sim)    #1000 x 1
Xsim = x[Ind_sim,1:2]    #Construct the x data
Ysim = y[Ind_sim]          #Construct the y data
boot_bhat[i,] = rq(Ysim~Xsim,tau=0.4)$coefficients
}
bhat=colMeans(boot_bhat)
bhat
#[1] -0.24659193  0.05058387  0.04053553
cov=cov(boot_bhat)
serr = sqrt(diag(cov))
serr
# 0.02940081 0.03847271 0.03450373

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-17 18:32:29

非常感谢#epoh，您写的很详细了，但是我初学R，不知道怎么转换成我想要的。
程序运行步骤是这样的，如下例：
m <- 3
n <- 10
s <- rep(1:n,rep(m,n))
x <- exp(rnorm(n*m))
X <- cbind(1,x)
u <- x*rnorm(m*n) + (1-x)*rf(m*n,3,3)
a <- rep(rnorm(n),rep(m,n))
y <- a + u
fit <- rq.fit.panel(X,y,s)
运行结果：
$coef
[1]  3.7116467 -2.6259010  3.8272535 -1.4451341  4.1059513 -0.9171851
[7] -3.8163941 -2.7247383 -3.2609396 -2.1087291 -1.8900982 -3.0358369
[13] -0.6968870 -0.1394642 -1.3196756 -2.5104425
$ierr  ##这是Error code for the internal Fortran routine srqfnc，不是标准误差
[1] 0
$it
[1] 10
$time
[1] 0
从上面运行结果可以看出，只有coef是我要的系数，但是其对应的t值和p-values未给出。所以想用
block bootstrap抽样得到。您上面程序我不知道如何套用。因为我运行时除了x，y，好有s(应该是用于识别面板结构的)。您能帮我再看看吗。
function(X,y,s,w=c(.25,.5,.25),taus=(1:3)/4,lambda = 1){

      require(SparseM)
      require(quantreg)
      K <- length(w)
      if(K != length(taus))
            stop("length of w and taus must match")
      X <- as.matrix(X)
      p <- ncol(X)
      n <- length(levels(as.factor(s)))
      N <- length(y)
      if(N != length(s) || N != nrow(X))
            stop("dimensions of y,X,s must match")
      Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))
      Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)
      Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr"))
      D <- rbind(Fidelity,Penalty)
      y <- c(w %x% y,rep(0,n))
      a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)),
            sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1,n))
      rq.fit.sfn(D,y,rhs=a)
      }
function (a, y, tau = 0.5, rhs = (1 - tau) * c(t(a) %*% rep(1,
length(y))), nsubmax, tmpmax, nnzlmax, cachsz = 64, small = 1e-06,
maxiter = 100, warn.mesg = TRUE)
{
y <- -y
n <- length(y)
m <- a@dimension[2]
if (n != a@dimension[1])
      stop("Dimensions of design matrix and the response vector are not compatible")
u <- rep(1, length = n)
x <- rep((1 - tau), length = n)
nnzdmax <- nnza <- a@ia[n + 1] - 1
iwmax <- 7 * m + 3
ao <- t(a)
e <- ao %*% a
nnzemax <- e@ia[m + 1] - 1
if (missing(nsubmax))
      nsubmax <- nnzemax
if (missing(nnzlmax))
      nnzlmax <- 4 * nnzdmax
if (missing(tmpmax))
      tmpmax <- 6 * m
wwm <- vector("numeric", 3 * m)
s <- u - x
b1 <- solve(e, ao %*% y, tmpmax = tmpmax, nnzlmax = nnzlmax,
      nsubmax = nsubmax)
r <- y - a %*% b1
z <- ifelse(abs(r) < small, (r * (r > 0) + small), r * (r >
      0))
w <- z - r
wwn <- matrix(0, n, 14)
wwn[, 1] <- r
wwn[, 2] <- z
wwn[, 3] <- w
srqfnb.o <- .Fortran("srqfn", n = as.integer(n), m = as.integer(m),
      nnza = as.integer(nnza), a = as.double(a@ra), ja = as.integer(a@ja),
      ia = as.integer(a@ia), ao = as.double(ao@ra), jao = as.integer(ao@ja),
      iao = as.integer(ao@ia), nnzdmax = as.integer(nnzdmax),
      d = double(nnzdmax), jd = integer(nnzdmax), id = integer(m +
         1), dsub = double(nnzemax + 1), jdsub = integer(nnzemax +
         1), nnzemax = as.integer(nnzemax), e = as.double(e@ra),
      je = as.integer(e@ja), ie = as.integer(e@ia), nsubmax = as.integer(nsubmax),
      lindx = integer(nsubmax), xlindx = integer(m + 1), nnzlmax = as.integer(nnzlmax),
      lnz = double(nnzlmax), xlnz = integer(m + 1), iw = integer(m *
         5), iwmax = as.integer(iwmax), iwork = integer(iwmax),
      xsuper = integer(m + 1), tmpmax = as.integer(tmpmax),
      tmpvec = double(tmpmax), wwm = as.double(wwm), wwn = as.double(wwn),
      cachsz = as.integer(cachsz), level = as.integer(8), x = as.double(x),
      s = as.double(s), u = as.double(u), c = as.double(y),
      sol = as.double(b1), rhs = as.double(rhs), small = as.double(small),
      ierr = integer(1), maxiter = as.integer(maxiter), time = double(7),
      PACKAGE = "quantreg")[c("sol", "ierr", "maxiter", "time")]
ierr <- srqfnb.o$ierr
if (!(ierr == 0) && warn.mesg)
      warning(sfnMessage(ierr))
list(coef = -srqfnb.o$sol, ierr = ierr, it = srqfnb.o$maxiter,
      time = sum(srqfnb.o$time))
}

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-17 20:25:01

library(quantreg)
m <- 3
n <- 10
s <- rep(1:n,rep(m,n))
x <- exp(rnorm(n*m))
X <- cbind(1,x)
u <- x*rnorm(m*n) + (1-x)*rf(m*n,3,3)
a <- rep(rnorm(n),rep(m,n))
y <- a + u
#########Sparse Regression Quantile Fitting
sX <- as.matrix.csr(X)
fit.sfn=rq.fit.sfn(sX, y)
fit.sfn
#$coef
#[1]  0.7230244 -1.5599576

######block bootstrap
n1=length(y)  #30
b=1000    #Number of bootstrap samples
boot_bhat=matrix(NA,b, dim(X)[2])
block_length = 10
num_blocks = n1/block_length #n/block_length  #3
Indices = seq(1:n1)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){       #Number of bootstrap samples
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]    #Find which data are in each block
Ind_sim = c(Ind_sim)
Xsim = X[Ind_sim,1:2]    #Construct the x data
sXsim <- as.matrix.csr(Xsim)
Ysim = y[Ind_sim]       #Construct the y data
boot_bhat[i,] = rq.fit.sfn(sXsim, y)$coef
}
bhat=colMeans(boot_bhat)
#bhat
cov=cov(boot_bhat)
serr = sqrt(diag(cov))
#serr

p <- length(bhat)
rdf <- n1 - p
vnames<- dimnames(x)[[2]]
coef <- array(bhat, c(p, 4))
dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
coef[, 2] <- serr
coef[, 3] <- coef[, 1]/coef[, 2]
coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
coef

         Value       Std. Error    t value       Pr(>|t|)
#[1,]  0.03135153  0.7788350  0.04025439 0.9681760
#[2,] -0.52056648  0.8418555 -0.61835611 0.5413385

##############################
rq.fit.panel<-function(X,y,s,w=c(.25,.5,.25),taus=(1:3)/4,lambda = 1){
# prototype function for panel data fitting of QR models
# the matrix X is assumed to contain an intercept
# the vector s is a strata indicator assumed (so far) to be a one-way layout
# NB:
# 1.  The value of the shrinkage parameter lambda is an open research problem in
#    the simplest homogneous settings it should be the ratio of the scale parameters
#    of the fixed effects and the idiocyncratic errors
# 2.  On return the coefficient vector has m*p + n elements where m is the number
#    quantiles being estimated, p is the number of colums of X, and n is the
#    number of distinct values of s.  The first m*p coefficients are the
#    slope estimates, and the last n are the "fixed effects"
# 3.  Like all shrinkage (regularization) estimators, asymptotic inference is somewhat
#    problematic... so the bootstrap is the natural first resort.

      require(SparseM)
      require(quantreg)
      K <- length(w)
      if(K != length(taus))
            stop("length of w and taus must match")
      X <- as.matrix(X)
      p <- ncol(X)
      n <- length(levels(as.factor(s)))
      N <- length(y)
      if(N != length(s) || N != nrow(X))
            stop("dimensions of y,X,s must match")
      Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))
      Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)
      Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr"))
      D <- rbind(Fidelity,Penalty)
      y <- c(w %x% y,rep(0,n))
      a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)),
            sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1,n))
      rq.fit.sfn(D,y,rhs=a)
      }
#########rq.fit.panel
fit.panel <- rq.fit.panel(X,y,s)
fit.panel
#$coef
#[1]  3.5696496 -3.8264048  3.0765997 -2.0488377  2.9257768 -0.8172033 -3.6342146 -3.6971347
#[9] -0.1692880 -0.5130398 -2.1588006 -0.9896829 -3.6029453 -0.9873640 -1.0739194 -1.5147111

######block bootstrap
n1=length(y)  #30
b=1000    #Number of bootstrap samples
ncoef=16
boot_bhat=matrix(NA,b, ncoef)
block_length = 10
num_blocks = n1/block_length #n/block_length
Indices = seq(1:n1)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){       #Number of bootstrap samples
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]    #Find which data are in each block
Ind_sim = c(Ind_sim)
Xsim = X[Ind_sim,1:dim(X)[2]]    #Construct the x data
Ysim = y[Ind_sim]       #Construct the y data
boot_bhat[i,] = rq.fit.panel(Xsim,Ysim,s)$coef
}
bhat=colMeans(boot_bhat)
#bhat

cov=cov(boot_bhat)
serr = sqrt(diag(cov))
#serr

p <- length(bhat)
rdf <- n1 - p
vnames<- dimnames(x)[[2]]
coef <- array(bhat, c(p, 4))
dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
coef[, 2] <- serr
coef[, 3] <- coef[, 1]/coef[, 2]
coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
coef
> coef
         Value          Std. Error t value       Pr(>|t|)
[1,]  3.5833974 1.224927  2.925397 0.011071952
[2,] -3.2408558 0.836343 -3.875032 0.001682470
[3,]  3.4498021 1.443538  2.389824 0.031476152
[4,] -1.7417926 1.514311 -1.150221 0.269321804
[5,]  3.2779020 1.430895  2.290806 0.038006777
[6,] -0.3387540 1.453237 -0.233103 0.819055078
[7,] -2.8649529 1.526798 -1.876445 0.081595934
[8,] -2.2847649 1.831747 -1.247315 0.232744869
[9,] -2.2825497 1.571369 -1.452587 0.168381907
[10,] -2.6825952 0.885694 -3.028806 0.009021254
[11,] -2.6366763 1.210942 -2.177375 0.047051236
[12,] -1.1954863 0.885605 -1.349909 0.198467848
[13,] -3.6453777 1.662293 -2.192982 0.045697269
[14,] -1.7934437 1.630096 -1.100208 0.289800173
[15,] -1.7644270 1.041941 -1.693405 0.112501121
[16,] -2.6937529 1.733657 -1.553798 0.142542035

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

点击查看更多内容…

ywh19860616

2010-10-17 20:58:31

#epoh
感谢您的热心解答，非常感谢

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

zhangtao

2010-10-17 22:08:42

非常佩服epoh,为什么我的R运行结果如下？
> library(quantreg)
> m <- 3
> n <- 10
> s <- rep(1:n,rep(m,n))
> x <- exp(rnorm(n*m))
> X <- cbind(1,x)
> u <- x*rnorm(m*n) + (1-x)*rf(m*n,3,3)
> a <- rep(rnorm(n),rep(m,n))
> y <- a + u
> #########Sparse Regression Quantile Fitting
> sX <- as.matrix.csr(X)
> fit.sfn=rq.fit.sfn(sX, y)
> fit.sfn
$coef
[1]  1.5907851 -0.9816827

$ierr
[1] 0

$it
[1] 8

$time
[1] 0

> #$coef
> #[1]  0.7230244 -1.5599576
>
> ######block bootstrap
> n1=length(y)  #30
> b=1000    #Number of bootstrap samples
> boot_bhat=matrix(NA,b, dim(X)[2])
> block_length = 10
> num_blocks = n1/block_length #n/block_length  #3
> Indices = seq(1:n1)  # All of the indices from 1 to n
> Indices = matrix(Indices,block_length,num_blocks)
> for (i in 1:b){       #Number of bootstrap samples
+ randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
+ Ind_sim = Indices[,randblock]    #Find which data are in each block
+ Ind_sim = c(Ind_sim)
+ Xsim = X[Ind_sim,1:2]    #Construct the x data
+ sXsim <- as.matrix.csr(Xsim)
+ Ysim = y[Ind_sim]       #Construct the y data
+ boot_bhat[i,] = rq.fit.sfn(sXsim, y)$coef
+ }
> bhat=colMeans(boot_bhat)
> #bhat
> cov=cov(boot_bhat)
> serr = sqrt(diag(cov))
> #serr
>
> p <- length(bhat)
> rdf <- n1 - p
> vnames<- dimnames(x)[[2]]
> coef <- array(bhat, c(p, 4))
> dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
> coef[, 2] <- serr
> coef[, 3] <- coef[, 1]/coef[, 2]
> coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
> coef
         Value Std. Error    t value  Pr(>|t|)
[1,] -0.01807533  1.0159156 -0.01779216 0.9859308
[2,] -0.16323621  0.5183736 -0.31490071 0.7551718
>
>             Value       Std. Error    t value       Pr(>|t|)
错误: 意外的符号在"          Value       Std."里
> #[1,]  0.03135153  0.7788350  0.04025439 0.9681760
> #[2,] -0.52056648  0.8418555 -0.61835611 0.5413385
>
> ##############################
> rq.fit.panel<-function(X,y,s,w=c(.25,.5,.25),taus=(1:3)/4,lambda = 1){
+ # prototype function for panel data fitting of QR models
+ # the matrix X is assumed to contain an intercept
+ # the vector s is a strata indicator assumed (so far) to be a one-way layout
+ # NB:
+ # 1.  The value of the shrinkage parameter lambda is an open research problem in
+ #    the simplest homogneous settings it should be the ratio of the scale parameters
+ #    of the fixed effects and the idiocyncratic errors
+ # 2.  On return the coefficient vector has m*p + n elements where m is the number
+ #    quantiles being estimated, p is the number of colums of X, and n is the
+ #    number of distinct values of s.  The first m*p coefficients are the
+ #    slope estimates, and the last n are the "fixed effects"
+ # 3.  Like all shrinkage (regularization) estimators, asymptotic inference is somewhat
+ #    problematic... so the bootstrap is the natural first resort.
+
+
+       require(SparseM)
+       require(quantreg)
+       K <- length(w)
+       if(K != length(taus))
+                stop("length of w and taus must match")
+       X <- as.matrix(X)
+       p <- ncol(X)
+       n <- length(levels(as.factor(s)))
+       N <- length(y)
+       if(N != length(s) || N != nrow(X))
+                stop("dimensions of y,X,s must match")
+       Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))
+       Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)
+       Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr"))
+       D <- rbind(Fidelity,Penalty)
+       y <- c(w %x% y,rep(0,n))
+       a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)),
+                sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1,n))
+       rq.fit.sfn(D,y,rhs=a)
+       }
> #########rq.fit.panel
> fit.panel <- rq.fit.panel(X,y,s)
> fit.panel
$coef
[1]  7.6260607 -2.1070905  7.5975820 -0.9305439  7.9502965 -0.6942313
[7] -8.0326189 -8.3645710 -6.1608858 -7.8483822 -9.1936181 -6.8814008
[13] -2.8142175 -4.5646799 -5.0150503 -5.2894871

$ierr
[1] 0

$it
[1] 9

$time
[1] 0

> #$coef
> #[1]  3.5696496 -3.8264048  3.0765997 -2.0488377  2.9257768 -0.8172033 -3.6342146 -3.6971347
> #[9] -0.1692880 -0.5130398 -2.1588006 -0.9896829 -3.6029453 -0.9873640 -1.0739194 -1.5147111
>
> ######block bootstrap
> n1=length(y)  #30
> b=1000    #Number of bootstrap samples
> ncoef=16
> boot_bhat=matrix(NA,b, ncoef)
> block_length = 10
> num_blocks = n1/block_length #n/block_length
> Indices = seq(1:n1)  # All of the indices from 1 to n
> Indices = matrix(Indices,block_length,num_blocks)
> for (i in 1:b){       #Number of bootstrap samples
+ randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
+ Ind_sim = Indices[,randblock]    #Find which data are in each block
+ Ind_sim = c(Ind_sim)
+ Xsim = X[Ind_sim,1:dim(X)[2]]    #Construct the x data
+ Ysim = y[Ind_sim]       #Construct the y data
+ boot_bhat[i,] = rq.fit.panel(Xsim,Ysim,s)$coef
+ }
共有43个警告 (用warnings()来显示)
> bhat=colMeans(boot_bhat)
> #bhat
>
> cov=cov(boot_bhat)
> serr = sqrt(diag(cov))
> #serr
>
> p <- length(bhat)
> rdf <- n1 - p
> vnames<- dimnames(x)[[2]]
> coef <- array(bhat, c(p, 4))
> dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
> coef[, 2] <- serr
> coef[, 3] <- coef[, 1]/coef[, 2]
> coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
> coef
      Value Std. Error t value  Pr(>|t|)
[1,]  7.534053 4.751090  1.585753 0.1351177
[2,] -2.856759 2.167977 -1.317707 0.2087586
[3,]  7.426722 4.896351  1.516787 0.1515725
[4,] -1.594420 1.660894 -0.959977 0.3533539
[5,]  7.847167 4.793434  1.637066 0.1238897
[6,] -1.187242 1.503429 -0.789689 0.4428796
[7,] -5.581834 4.066550 -1.372621 0.1914601
[8,] -5.812536 4.126287 -1.408660 0.1807565
[9,] -5.574963 3.945628 -1.412947 0.1795168
[10,] -5.235239 3.849973 -1.359812 0.1953872
[11,] -6.375304 3.684595 -1.730259 0.1055559
[12,] -5.187254 4.007587 -1.294359 0.2164851
[13,] -3.887769 2.977477 -1.305726 0.2126952
[14,] -5.792848 3.690437 -1.569691 0.1388069
[15,] -4.975033 3.700436 -1.344445 0.2001844
[16,] -5.451939 3.380240 -1.612885 0.1290763
> > coef
错误: 意外的'>'在">"里

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-18 07:35:00

楼上的，我运行不会出现这个问题

#epoh ，谢谢您，能帮我再看下这个问题吗
我想问一下，block_length = 10 这个是如何定的？是根据n=10来确定的吗？即按时间或者截面个数来确定吗？
就是block bootstrap分块是按照什么标准来分的？您能否推荐一些文献。您帮我编写的程序是按下图的抽样方法吗？

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-18 09:45:08

block_length = 10,只是为了程序运行,
随意订的.
block_length,应该跟数据本质有关,
譬如一星期,一个月,一季,一年.

你上传的是最基本的bootstrap
假设数据data是:
   x    y
1  10 100
2  20 200
3  30 300
4  40 400
5  50 500
6  60 600
7  70 700
8  80 800
9  90 900
10 99 999

n=10
index=sample(seq(1:n),n,replace = TRUE)
index
#[1]  7  7  9  5  9  2 10 10  9  5
#随机抽样后,取出又放回,所以replace = TRUE
#抽出后的新样本就是:
data[index,]
      x    y
[1,] 70 700
[2,] 70 700
[3,] 90 900
[4,] 50 500
[5,] 90 900
[6,] 20 200
[7,] 99 999
[8,] 99 999
[9,] 90 900
[10,] 50 500

然后以新样本回归,再重新抽样,进行B次

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-18 10:35:52

#epoh,谢谢您的热心帮助

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

zhangtao

2010-10-18 21:52:36

ywh19860616 朋友，能不能把你在9楼所附文件的完整文章传上来，我学习学习，非常感谢！
要是不方便，就算了，还是非常感谢你！

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-19 13:21:28

zhangtao兄弟，完整文章我就不上传了，同学的
您可以搜索，作者：林馨怡，国立政治大学经济学系

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

zhangtao

2010-10-19 16:04:04

非常非常感谢！

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-19 17:00:46

Block Bootstrap:resample blocks instead of single elements.
                        the dependence structure is preserved.

##An illustration of "Moving Block Bootstrap";
##the blue points in the three yellow rectangles
##(blocks) are bootstrap samples.

x = 5 * sin(seq(0, pi, length = 90)) + rnorm(90)
plot(x, main = "Illustration of Moving Block Bootstrap (MBB)")
for (idx in 1:(length(x) - 30 + 1)) {
rect(idx, min(x[idx:(idx + 30 - 1)]), idx + 30 - 1, max(x[idx:(idx +
30 - 1)]), border = "gray")
Sys.sleep(0.2)
}
bt = sample(1:(length(x) - 30 + 1), 3, rep = T)
for (b in bt) {
rect(b, min(x[b:(b + 30 - 1)]), b + 30 - 1, max(x[b:(b +
30 - 1)]), col = "yellow")
points(b:(b + 30 - 1), x[b:(b + 30 - 1)], col = "blue", pch = 19)
}

##optimal Block-Length Selection
Automatic Block-Length Selection for the Dependent Bootstrap
http://econ.duke.edu/~ap172/Politis_White_2004.pdf

CORRECTION TO “AUTOMATIC BLOCK-LENGTH SELECTION...."
http://econ.duke.edu/~ap172/Patton_Politis_White_2009.pdf

## ppw.R
## $Id: ppw.R,v 1.47 2008/12/12 14:52:17 jracine Exp jracine $

## Original code in Matlab by A. Patton, R translation and
## modifications by C. Parmeter and J. Racine.
##
## We are grateful to Andrew Patton and Dimitris Politis for their
## assistance and feedback. Kindly report features, deficiencies, and
## improvements to racinej@mcmaster.ca.
##
## The citation is A. Patton, D.N. Politis, and H. White (2008,
## forthcoming), "CORRECTION TO `Automatic Block-Length Selection for
## the Dependent Bootstrap' by D.N. Politis and H. White". This is
## based on the article by Politis, D.N., and H. White (2004),
## "Automatic block-length selection for the dependent bootstrap."
## Econometric Reviews, vol. 23.
##
## INPUTS:  data, an n x k matrix.
##
## OUTPUTS: b.star, a 2 x k vector of optimal bootstrap block lengths
## for the stationary bootstrap and circular bootstrap (BstarSB,
## BstarCB).

## The function lam() is used to construct a "flat-top" lag window for
## spectral estimation based on Politis, D.N. and J.P. Romano (1995),
## "Bias-Corrected Nonparametric Spectral Estimation", Journal of Time
## Series Analysis, vol. 16, No. 1.

lam <- function(s){
  return((abs(s)>=0)*(abs(s)<0.5)+2*(1-abs(s))*(abs(s)>=0.5)*(abs(s)<=1))
}

## The function b.star() returns the optimal bootstrap block
## lengths. Note that an example for usage appears at the bottom of
## this file. If you use this function as input into a routine such as
## tsboot() in the boot library (Angelo Canty and Brian Ripley
## (2008). boot: Bootstrap R (S-Plus) Functions. R package version
## 1.2-34.) you ought to use the option round=TRUE.

b.star <- function(data,
               Kn = NULL,
               mmax= NULL,
               Bmax = NULL,
               c = NULL,
               round = FALSE){

  ## Convert the data object to a data frame to handle both vectors
  ## and matrices.

  data <- data.frame(data)
  n <- nrow(data)
  k <- ncol(data)

  ## Set Defaults. Note that in footnote c, page 59, for Kn Politis
  ## and White (2004) use max(5,log10(n)). Since this must be an
  ## integer we use ceiling(log10(n)).

  if (is.null(Kn)) Kn <- max(5,ceiling(log10(n)))
  if (is.null(mmax)) mmax <- ceiling(sqrt(n))+Kn
  if (is.null(Bmax)) Bmax <- ceiling(min(3*sqrt(n),n/3))
  if (is.null(c)) c <- qnorm(0.975)

  ## Create two vectors of length k in which we store results.

  BstarSB <- numeric(length=k)
  BstarCB <- numeric(length=k)

  ## Now we loop through each variable in data (i.e., column,
  ## data[,i]).

  for(i in 1:k) {

## We first obtain the autocorrelations rho(1),...,rho(mmax) (we
## need to drop the first autocorrelation as it is rho(0), hence
## acf[-1]). This is the default in acf [type="correlation"]. Note
## that Patton uses sample correlations after dropping the first
## mmax observations, while we instead use the acf to obtain
## rho(k).

rho.k <- acf(data[,i],
               lag.max = mmax,
               type = "correlation",
               plot = FALSE)$acf[-1]

## Next we compute mhat. The use of c*sqrt(log10(n)/n) for
## critical values is given in footnote c of Politis and White
## (2004, page 59), and the approach for determining mhat is
## described in footnote c.

rho.k.crit <- c*sqrt(log10(n)/n)

## Compute the number of insignificant runs following each rho(k),
## k=1,...,mmax.

num.insignificant <- sapply(1:(mmax-Kn+1),
                              function(j){
                              sum((abs(rho.k) < rho.k.crit)[j:(j+Kn-1)])
                              })

## If there are any values of rho(k) for which the Kn proceeding
## values of rho(k+j), j=1,...,Kn are all insignificant, take the
## smallest rho(k) such that this holds (see footnote c for
## further details).

if(any(num.insignificant==Kn)) {
   mhat <- which(num.insignificant==Kn)[1]
} else {

   ## If no runs of length Kn are insignificant, take the smallest
   ## value of rho(k) that is significant.

   if(any(abs(rho.k) > rho.k.crit)) {

      lag.sig <- which(abs(rho.k) > rho.k.crit)
      k.sig <- length(lag.sig)

      if(k.sig == 1) {

      ## When only one lag is significant, mhat is the sole
      ## significant rho(k).

      mhat <- lag.sig

      } else {

      ## If there are more than one significant lags but no runs
      ## of length Kn, take the largest value of rho(k) that is
      ## significant.

      mhat <- max(lag.sig)

      }

   } else {

      ## When there are no significant lags, mhat must be the
      ## smallest positive integer (footnote c), hence mhat is set
      ## to one.

      mhat <- 1

   }

}

## Compute M (mhat is at least one).

M <- ifelse(2*mhat > mmax, mmax, 2*mhat)

## We compute BstarSB and BstarCB using the formulas in the above
## references. Now we require the autocovariance R(k) (hence
## type="covariance" in the acf call). Note that Patton uses
## sample covariances after dropping the first mmax observations,
## while we instead use the acf with type="covariance" to obtain
## R(k). Note also that we require R(0) hence we do not drop it as
## we did for rho(k) via acf(...)$acf[-1].

kk <- seq(-M,M)

R.k <- ccf(data[,i], data[,i],
            lag.max = M,
            type = "covariance",
            plot = FALSE)$acf

Ghat <- sum(lam(kk/M)*abs(kk)*R.k)
DCBhat <- 4/3*sum(lam(kk/M)*R.k)^2
DSBhat <- 2*sum(lam(kk/M)*R.k)^2
BstarSB <- ((2*Ghat^2)/DSBhat)^(1/3)*n^(1/3)
BstarCB <- ((2*(Ghat^2)/DCBhat)^(1/3))*(n^(1/3))

  }

  ## The user can choose whether they want rounded values returned or
  ## not. BstarCB is rounded up, BstarSB simply rounded but both must
  ## be positive integers.

  if(round == FALSE) {

BstarSB <- ifelse(BstarSB > Bmax, Bmax, BstarSB)
BstarCB <- ifelse(BstarCB > Bmax, Bmax, BstarCB)

  } else {

BstarSB <- ifelse(BstarSB > Bmax, Bmax, ifelse(BstarSB < 1, 1, round(BstarSB)))
BstarCB <- ifelse(BstarCB > Bmax, Bmax, ifelse(BstarCB < 1, 1, ceiling(BstarCB)))

  }

  return(cbind(BstarSB,BstarCB))

}

## Here is a simple example with an n x 2 matrix containing n=10^5
## observations, where column 2 of x is more persistent than column
## 1. This requires that you first install the forecast library (i.e.,
## install.packages("forecast")).
##
##  library(forecast)
##  set.seed(123)
##  x <- cbind(arima.sim(n = 100000, list(ar = c(.5,.0), ma = c(0,0)),sd = 1),
##          arima.sim(n = 100000, list(ar = c(.5,.4), ma = c(0,0)),sd = 1))
##  b.star(x)
##  b.star(x,round=TRUE)
##> b.star(x)
##    BstarSB BstarCB
##[1,]  50.39272  57.68526
##[2,] 251.62894 288.04323
##> b.star(x,round=TRUE)
##    BstarSB BstarCB
##[1,]    50    58
##[2,]    252    289

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-19 17:52:06

#epoh
我知道这个是自动选择分块数的程序b.star。但是对于面板数据，解释变量多于1个时，好像不适用
您看这个结果：
##    BstarSB BstarCB
##[1,]    50    58
##[2,]    252    289
对应2个解释变量，有不同分法的，这应该是不可以的。因为我数据y x1 x2肯定是放在一起抽样的，才会对应
而不是分开一个一个抽样。您觉得我的说法对吗？
问题：
如果我想把这个程序用于我的问题，应该怎么解决？
如果这个程序不能用，我的数据是以年度为单位的。block_length应该怎么选择为好，是直接选择等于截面个数吗？我觉得如果直接选择截面个数了，那就相当于在每个截面分别抽取个数相同的样本，这样可以理解。
在实际应用中，我的数据是如下排列的（也是R中面板数据排列格式），如数据格式为：
id  year
1 1997
1 1998
1 1999
1 2000
1 2001
2 1997
2 1998
2 1999
2 2000
2 2001
我取block-length=5，那就相当于以每个截面抽样，您帮我写的算法是这样的吗

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

楚韵荆风

2010-10-19 19:30:06

看来都是R高手，学习了！

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-19 19:32:05

看看package "plm" Linear Models for Panel Data
是不是适合你
data(EmplUK)
   firm year sector    emp wage capital output
1    1 1977    7 5.041 13.1516  0.5894  95.7072
2    1 1978    7 5.600 12.3018  0.6318  97.3569
3    1 1979    7 5.015 12.8395  0.6771  99.6083
4    1 1980    7 4.715 13.8039  0.6171 100.5501
5    1 1981    7 4.093 14.2897  0.5076  99.5581
6    1 1982    7 3.166 14.8681  0.4229  98.6151
7    1 1983    7 2.936 13.7784  0.3920 100.0301
8    2 1977    7  71.319 14.7909 16.9363  95.7072
9    2 1978    7  70.643 14.1036 17.2422  97.3569
10    2 1979    7  70.918 14.9534 17.5413  99.6083
11    2 1980    7  72.031 15.4910 17.6574 100.5501
12    2 1981    7  73.689 16.1969 16.7133  99.5581
13    2 1982    7  72.419 16.1314 16.2469  98.6151
14    2 1983    7  68.518 16.3051 17.3696 100.0301
15    3 1977    7  19.156 22.6920  7.0975  95.7072
..........

library(plm)
data("EmplUK", package="plm")

z2 <- pgmm(dynformula(log(emp)~log(wage)+log(capital),list(1,1,1)),
   data=EmplUK, effect="twoways", model="onestep",
   gmm.inst=~log(emp)+log(wage)+log(capital),lag.gmm=c(2,99),
   transformation="ld")
summary(z2,robust=TRUE)

Twoways effects One step model

Call:
pgmm(formula = dynformula(log(emp) ~ log(wage) + log(capital),
list(1, 1, 1)), data = EmplUK, effect = "twoways", model = "onestep",
gmm.inst = ~log(emp) + log(wage) + log(capital), lag.gmm = c(2,
      99), transformation = "ld")

Unbalanced Panel: n=140, T=7-9, N=1031

Number of Observations Used:  891

Residuals
   Min. 1st Qu.    Median    Mean 3rd Qu.    Max.
-6.571e-01 -4.703e-02  5.937e-03 -2.110e-13  5.784e-02  5.045e-01

Coefficients
                        Estimate    Std. Error z-value  Pr(>|z|)
lag(log(emp), 1)    0.935605 0.026295 35.5810 < 2.2e-16 ***
log(wage)             -0.630976 0.118054  -5.3448 9.050e-08 ***
lag(log(wage), 1) 0.482620 0.136887 3.5257 0.0004224 ***
log(capital)          0.483930 0.053867 8.9838 < 2.2e-16 ***
lag(log(capital), 1) -0.424393 0.058479 -7.2572 3.952e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Sargan Test: chisq(100) = 236.0249 (p.value=5.2115e-13)
Autocorrelation test (1): normal = -4.808434 (p.value=7.6059e-07)
Autocorrelation test (2): normal = -0.2800133 (p.value=0.38973)
Wald test for coefficients: chisq(5) = 11174.82 (p.value=< 2.22e-16)

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-19 19:37:40

#epoh，谢谢您，我不是为了估计面板数据
我上面列出只是给出数据格式，问问怎么抽样？因为我发现b.star不适合有两个解释变量时X1和X2
因为结果有两种：
##    BstarSB BstarCB
##[1,]    50    58
##[2,]    252    289

呵呵，让你费心了

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-19 21:38:00

BOOTSTRAP FOR PANEL DATA MODELS.pdf

BOOTSTRAP FOR PANEL DATA MODELS.pdf
大小:(199.57 KB)

马上下载

page 3/30
A panel dataset with N individuals
and T time periods is represented
by a matrix Y of N rows and T columns.

page 6/28 Block Bootstrap
Assume that T = Kl, with l the length of a block,
then there are K non-overlapping blocks.

依你的数据 year1997,1998,1999,2000,2001
T=5,K=5,l=1
l没的选了.

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-19 22:38:07

#epoh ，谢谢，和你学到抽样的很多知识
呵呵，那数据是我假设的，经过您的提醒，大致知道怎么选择block-length。
您看我的想法是否正确？
假设我有20个个体，6年的数据。
我按照R中面板数据格式排列为：
id year y
1 1991 0.1
1 1992 0.2
1 1993 0.3
1 1994 0.4
1 1995 0.5
1 1996 0.6
...............
20 1991
20
20
20
20
20 1996
那么我这T的6，如果我选择block-length=3，那么K=2。这样的意思就是在每个个体1991-1996年分为2块，
再在每块中抽样，是这样吗？

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-20 08:19:29

b.start是根据文献
Automatic Block-Length Selection for the "Dependent Bootstrap"
很容易的可以由范例"dependent data"的模拟程序,看出结果
##> b.star(x)
##    BstarSB BstarCB
##[1,]  50.39272  57.68526
##[2,] 251.62894 288.04323
x1=arima.sim(n =100000, list(ar=c(.5,.0),ma=c(0,0)),sd=1)
x2=arima.sim(n =100000, list(ar=c(.5,.4),ma=c(0,0)),sd=1) #more persistent

至于panel data抽样过程如下:
year=c(1991,1992,1993,1994,1995,1996)
x1=c(10,20,30,40,50,60)
x2=c(100,200,300,400,500,600)
x=cbind(year,x1,x2)
x
#    year x1  x2
#[1,] 1991 10 100
#[2,] 1992 20 200
#[3,] 1993 30 300
#[4,] 1994 40 400
#[5,] 1995 50 500
#[6,] 1996 60 600

n=6
p=2
b=1000 #Number of bootstrap samples
boot_bhat=matrix(NA,b, p)
block_length = 3    #请自行更改为block_length=2,block_length=1
                                 #会对抽样过程更清晰理解
num_blocks = n/block_length #n/block_length  #2
Indices = seq(1:n)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
#第一次抽样
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]    #Find which data are in each block
Ind_sim = c(Ind_sim)
Ind_sim  #[1] 1 2 3 1 2 3
xsim = x[Ind_sim,1:dim(x)[2]]    #Construct the x data
xsim #抽出的新样本
#    year x1  x2
#[1,] 1991 10 100
#[2,] 1992 20 200
#[3,] 1993 30 300
#[4,] 1991 10 100
#[5,] 1992 20 200
#[6,] 1993 30 300
#回归
#将估计出的系数放在 matrix boot_bhat

#第二次抽样
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]    #Find which data are in each block
Ind_sim = c(Ind_sim)
Ind_sim  #[1] 4 5 6 1 2 3
xsim = x[Ind_sim,1:dim(x)[2]]    #Construct the x data
xsim #抽出的新样本
#    year x1  x2
#[1,] 1994 40 400
#[2,] 1995 50 500
#[3,] 1996 60 600
#[4,] 1991 10 100
#[5,] 1992 20 200
#[6,] 1993 30 300
#回归
#将估计出的系数放在 matrix boot_bhat
.......
#如此抽样回归一千次.

最后楼主可以采Jackknife-after-bootstrap method
The jackknife after bootstrap is used to estimate standard errors for
the bootstrap estimate of standard error, and the
influence of each observation on these estimates.
详细请参阅s-plus statman2.pdf page 515/576

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-20 11:58:58

#epoh ，谢谢您一直以来的热心指导，非常感谢

我仔细运行了程序中每一句命令，发现对于抽样过程还是未很好掌握，以下以例子说明：
假如现我有2个地区，6年数据的：
year=c(1991,1992,1993,1994,1995,1996,1991,1992,1993,1994,1995,1996)
x1=c(10,20,30,40,50,60,70,80,90,100,110,120)
x2=c(101,201,301,401,501,601,701,801,901,1001,1101,1201)
x=cbind(year,x1,x2)
#x
> x
   year  x1 x2
[1,] 1991  10  101
[2,] 1992  20  201
[3,] 1993  30  301
[4,] 1994  40  401
[5,] 1995  50  501
[6,] 1996  60  601
[7,] 1991  70  701
[8,] 1992  80  801
[9,] 1993  90  901
[10,] 1994 100 1001
[11,] 1995 110 1101
[12,] 1996 120 1201

#这种数据排列，按照R中对面板数据排列要求排列的。共有N=12个样本，其中n=2（个体），T=6（年份），N=n*T

#以下是您给出的，对应一个个体，6年的数据的抽样过程：
n=6
p=2
b=1000 #Number of bootstrap samples
boot_bhat=matrix(NA,b, p)
block_length = 3
num_blocks = n/block_length #n/block_length  #2
Indices = seq(1:n)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
#第一次抽样
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use，
Ind_sim = Indices[,randblock]    #Find which data are in each block
Ind_sim = c(Ind_sim)
xsim = x[Ind_sim,1:dim(x)[2]]    #Construct the x data
xsim #抽出的新样本
#回归
#将估计出的系数放在 matrix boot_bhat

#第二次抽样
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]    #Find which data are in each block
Ind_sim = c(Ind_sim)
Ind_sim  #[1] 4 5 6 1 2 3
xsim = x[Ind_sim,1:dim(x)[2]]    #Construct the x data
xsim #抽出的新样本
#回归
#将估计出的系数放在 matrix boot_bhat

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-21 11:16:21

#epoh，您能帮我看下，上面怎么理解吗

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-21 11:19:05

哈哈!刚发短信息答覆.

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-21 11:24:20

#epoh，谢谢
针对这个数据
year=c(1991,1992,1993,1994,1995,1996,1991,1992,1993,1994,1995,1996)
x1=c(10,20,30,40,50,60,70,80,90,100,110,120)
x2=c(101,201,301,401,501,601,701,801,901,1001,1101,1201)
x=cbind(year,x1,x2)

您给出的程序抽样过程，我不理解
23楼红色部分：
分为1991,1992,1993,1994/////1995,1996,1991,1992/////1993,1994,1995,1996分为3块。这样好像没有按年份分了？因为1995,1996,1991,1992分在了一块，这个怎么理解？

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

epoh

2010-10-21 11:45:13

哈哈!答非所问
原来你内容有更改
晚点给你答覆.

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-21 11:47:33

#epoh ，好的，谢谢您
那个问题我也不懂，并非答非所问，谢谢

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

zhangtao

2010-10-21 16:15:50

非常非常精彩，对R有了更深刻的认识

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

ywh19860616

2010-10-21 19:31:52

呵呵，都是在向epoh请教

扫码加我拉你入群

请注明：姓名-公司-职位

以便审核进群资格，未注明则拒绝

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

扫码加我 拉你入群

分享

扫码加好友，拉您进群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群

扫码加我拉你入群