全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 R语言论坛
19224 58
2010-10-16
用R使用了如下命令:
> y <- rnorm(50)
> x <- matrix(rnorm(100),50)
> fit <- rq(y~x,tau = .4)
Call:
rq(formula = y ~ x, tau = 0.4)
Coefficients:
(Intercept)          x1          x2
-0.38203826  0.09384216  0.09347414
Degrees of freedom: 50 total; 47 residual
Call: rq(formula = y ~ x, tau = 0.4)
tau: [1] 0.4
Coefficients:
            Value    Std. Error t value  Pr(>|t|)
(Intercept) -0.38204  0.21700   -1.76056  0.08482
x1           0.09384  0.18203    0.51552  0.60861
x2           0.09347  0.24560    0.38059  0.70522

但是我现在想做的数据结构不是上面的一列数据格式(简称为截面),我现在想做面板(问一句,对于bootstrap,不同数据类型会不同吗,如面板数据和截面数据?),计算系数的程序已经
基本上有了,就是R中quantreg的rq.fit.sfn。但是这个程序只运行出coeff,而t值和p值都没有,所以我想用bootstrap计算(使用block bootstrap即可)
应该怎么实现?

> summary.rq
function (object, se = "nid", covariance = FALSE, hs = TRUE,
    ...)
{
    if (object$method == "lasso")
        stop("no inference for lasso'd rq fitting: try rqss (if brave)")
    mt <- terms(object)
    m <- model.frame(object)
    y <- model.response(m)
    x <- model.matrix(mt, m, contrasts = object$contrasts)
    wt <- model.weights(object$model)
    tau <- object$tau
    eps <- .Machine$double.eps^(2/3)
    coef <- coefficients(object)
    if (is.matrix(coef))
        coef <- coef[, 1]
    vnames <- dimnames(x)[[2]]
    resid <- object$residuals
    n <- length(resid)
    p <- length(coef)
    rdf <- n - p
    if (!is.null(wt)) {
        resid <- resid * wt
        x <- x * wt
        y <- y * wt
    }
    if (missing(se)) {
        if (n < 1001 & covariance == FALSE)
            se <- "rank"
        else se <- "nid"
    }
    if (se == "rank") {
        f <- rq.fit.br(x, y, tau = tau, ci = TRUE, ...)
    }
    if (se == "iid") {
        xxinv <- diag(p)
        xxinv <- backsolve(qr(x)$qr[1:p, 1:p, drop = FALSE],
            xxinv)
        xxinv <- xxinv %*% t(xxinv)
        pz <- sum(abs(resid) < eps)
        h <- max(p + 1, ceiling(n * bandwidth.rq(tau, n, hs = hs)))
        ir <- (pz + 1):(h + pz + 1)
        ord.resid <- sort(resid[order(abs(resid))][ir])
        xt <- ir/(n - p)
        sparsity <- rq(ord.resid ~ xt)$coef[2]
        cov <- sparsity^2 * xxinv * tau * (1 - tau)
        scale <- 1/sparsity
        serr <- sqrt(diag(cov))
    }
    else if (se == "nid") {
        h <- bandwidth.rq(tau, n, hs = hs)
        if (tau + h > 1)
            stop("tau + h > 1:  error in summary.rq")
        if (tau - h < 0)
            stop("tau - h < 0:  error in summary.rq")
        bhi <- rq.fit.fnb(x, y, tau = tau + h)$coef
        blo <- rq.fit.fnb(x, y, tau = tau - h)$coef
        dyhat <- x %*% (bhi - blo)
        if (any(dyhat <= 0))
            warning(paste(sum(dyhat <= 0), "non-positive fis"))
        f <- pmax(0, (2 * h)/(dyhat - eps))
        fxxinv <- diag(p)
        fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p, drop = FALSE],
            fxxinv)
        fxxinv <- fxxinv %*% t(fxxinv)
        cov <- tau * (1 - tau) * fxxinv %*% crossprod(x) %*%
            fxxinv
        scale <- mean(f)
        serr <- sqrt(diag(cov))
    }
    else if (se == "ker") {
        h <- bandwidth.rq(tau, n, hs = hs)
        if (tau + h > 1)
            stop("tau + h > 1:  error in summary.rq")
        if (tau - h < 0)
            stop("tau - h < 0:  error in summary.rq")
        uhat <- c(y - x %*% coef)
        h <- (qnorm(tau + h) - qnorm(tau - h)) * min(sqrt(var(uhat)),
            (quantile(uhat, 0.75) - quantile(uhat, 0.25))/1.34)
        f <- dnorm(uhat/h)/h
        fxxinv <- diag(p)
        fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p, drop = FALSE],
            fxxinv)
        fxxinv <- fxxinv %*% t(fxxinv)
        cov <- tau * (1 - tau) * fxxinv %*% crossprod(x) %*%
            fxxinv
        scale <- mean(f)
        serr <- sqrt(diag(cov))
    }
    else if (se == "boot") {
        B <- boot.rq(x, y, tau, ...)
        cov <- cov(B)
        serr <- sqrt(diag(cov))
    }
    if (se == "rank") {
        coef <- f$coef
    }
    else {
        coef <- array(coef, c(p, 4))
        dimnames(coef) <- list(vnames, c("Value", "Std. Error",
            "t value", "Pr(>|t|)"))
        coef[, 2] <- serr
        coef[, 3] <- coef[, 1]/coef[, 2]
        coef[, 4] <- if (rdf > 0)
            2 * (1 - pt(abs(coef[, 3]), rdf))
        else NA
    }
    object <- object[c("call", "terms")]
    if (covariance == TRUE) {
        object$cov <- cov
        if (se == "iid")
            object$scale <- scale
        if (se %in% c("nid", "ker")) {
            object$Hinv <- fxxinv
            object$J <- crossprod(x)
            object$scale <- scale
        }
        else if (se == "boot") {
            object$B <- B
        }
    }
    object$coefficients <- coef
    object$rdf <- rdf
    object$tau <- tau
    class(object) <- "summary.rq"
    object
}
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

全部回复
2010-10-17 11:33:45
Normal bootstrap works by creating data
by randomly selecting observations

Block bootstrap does this by randomly selecting
blocks of data, rather than individual data.

想了解一下,楼主是想保留data Block部分的当时特性?
如果是的话,等于是不用summary
重新编程,算出系数的standard errors.
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2010-10-17 13:18:28
#epoh
嗯,我用了一个面板数据模型,因为我程序只运行出来系数,而没有给出对应的p值和t值。看了文献,文献提到使用block bootstrap抽样来计算standard errors,从而计算出对应的t值和p-value等拟合优度。我的目的就是这个。
我看了一些文献,说面板数据抽样和一般数据抽样存在不同,所以想问这应该怎么实现?
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2010-10-17 16:32:06
library(quantreg)
n=1000     #n observations
y <- rnorm(n)
x <- matrix(rnorm(2*n),n)
fit <- rq(y~x,tau = .4)
fit
summary(fit,se = "boot", bsmethod= "xy")
#Coefficients:
#            Value    Std. Error t value  Pr(>|t|)
#(Intercept) -0.24258  0.03358   -7.22396  0.00000
#x1               0.05268  0.03514    1.49913  0.13416
#x2               0.03694  0.02999    1.23160  0.21839
######block bootstrap
b=1000     #Number of bootstrap samples
boot_bhat=matrix(NA,b, dim(x)[2]+1)
block_length = 50  
num_blocks = 20   #n/block_length
Indices = seq(1:n)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){          #Number of bootstrap samples
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]       #Find which data are in each block  250 x 20
Ind_sim = c(Ind_sim)       #1000 x 1
Xsim = x[Ind_sim,1:2]      #Construct the x data
Ysim = y[Ind_sim]            #Construct the y data
boot_bhat[i,] = rq(Ysim~Xsim,tau=0.4)$coefficients
}
bhat=colMeans(boot_bhat)
bhat
#[1] -0.24659193  0.05058387  0.04053553
cov=cov(boot_bhat)
serr = sqrt(diag(cov))
serr   
# 0.02940081 0.03847271 0.03450373
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2010-10-17 18:32:29
非常感谢#epoh,您写的很详细了,但是我初学R,不知道怎么转换成我想要的。
程序运行步骤是这样的,如下例:
m <- 3
n <- 10
s <- rep(1:n,rep(m,n))
x <- exp(rnorm(n*m))
X <- cbind(1,x)
u <- x*rnorm(m*n) + (1-x)*rf(m*n,3,3)
a <- rep(rnorm(n),rep(m,n))
y <- a + u
fit <- rq.fit.panel(X,y,s)
运行结果:
$coef
[1]  3.7116467 -2.6259010  3.8272535 -1.4451341  4.1059513 -0.9171851
[7] -3.8163941 -2.7247383 -3.2609396 -2.1087291 -1.8900982 -3.0358369
[13] -0.6968870 -0.1394642 -1.3196756 -2.5104425
$ierr  ##这是Error code for the internal Fortran routine srqfnc,不是标准误差
[1] 0
$it
[1] 10
$time
[1] 0
从上面运行结果可以看出,只有coef是我要的系数,但是其对应的t值和p-values未给出。所以想用
block bootstrap抽样得到。您上面程序我不知道如何套用。因为我运行时除了x,y,好有s(应该是用于识别面板结构的)。您能帮我再看看吗。
function(X,y,s,w=c(.25,.5,.25),taus=(1:3)/4,lambda = 1){

        require(SparseM)
        require(quantreg)
        K <- length(w)
        if(K != length(taus))
                stop("length of w and taus must match")
        X <- as.matrix(X)
        p <- ncol(X)
        n <- length(levels(as.factor(s)))
        N <- length(y)
        if(N != length(s) || N != nrow(X))
                stop("dimensions of y,X,s must match")
        Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))
        Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)
        Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr"))
        D <- rbind(Fidelity,Penalty)
        y <- c(w %x% y,rep(0,n))
        a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)),
                sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1,n))
        rq.fit.sfn(D,y,rhs=a)
        }
function (a, y, tau = 0.5, rhs = (1 - tau) * c(t(a) %*% rep(1,
    length(y))), nsubmax, tmpmax, nnzlmax, cachsz = 64, small = 1e-06,
    maxiter = 100, warn.mesg = TRUE)
{
    y <- -y
    n <- length(y)
    m <- a@dimension[2]
    if (n != a@dimension[1])
        stop("Dimensions of design matrix and the response vector are not compatible")
    u <- rep(1, length = n)
    x <- rep((1 - tau), length = n)
    nnzdmax <- nnza <- a@ia[n + 1] - 1
    iwmax <- 7 * m + 3
    ao <- t(a)
    e <- ao %*% a
    nnzemax <- e@ia[m + 1] - 1
    if (missing(nsubmax))
        nsubmax <- nnzemax
    if (missing(nnzlmax))
        nnzlmax <- 4 * nnzdmax
    if (missing(tmpmax))
        tmpmax <- 6 * m
    wwm <- vector("numeric", 3 * m)
    s <- u - x
    b1 <- solve(e, ao %*% y, tmpmax = tmpmax, nnzlmax = nnzlmax,
        nsubmax = nsubmax)
    r <- y - a %*% b1
    z <- ifelse(abs(r) < small, (r * (r > 0) + small), r * (r >
        0))
    w <- z - r
    wwn <- matrix(0, n, 14)
    wwn[, 1] <- r
    wwn[, 2] <- z
    wwn[, 3] <- w
    srqfnb.o <- .Fortran("srqfn", n = as.integer(n), m = as.integer(m),
        nnza = as.integer(nnza), a = as.double(a@ra), ja = as.integer(a@ja),
        ia = as.integer(a@ia), ao = as.double(ao@ra), jao = as.integer(ao@ja),
        iao = as.integer(ao@ia), nnzdmax = as.integer(nnzdmax),
        d = double(nnzdmax), jd = integer(nnzdmax), id = integer(m +
            1), dsub = double(nnzemax + 1), jdsub = integer(nnzemax +
            1), nnzemax = as.integer(nnzemax), e = as.double(e@ra),
        je = as.integer(e@ja), ie = as.integer(e@ia), nsubmax = as.integer(nsubmax),
        lindx = integer(nsubmax), xlindx = integer(m + 1), nnzlmax = as.integer(nnzlmax),
        lnz = double(nnzlmax), xlnz = integer(m + 1), iw = integer(m *
            5), iwmax = as.integer(iwmax), iwork = integer(iwmax),
        xsuper = integer(m + 1), tmpmax = as.integer(tmpmax),
        tmpvec = double(tmpmax), wwm = as.double(wwm), wwn = as.double(wwn),
        cachsz = as.integer(cachsz), level = as.integer(8), x = as.double(x),
        s = as.double(s), u = as.double(u), c = as.double(y),
        sol = as.double(b1), rhs = as.double(rhs), small = as.double(small),
        ierr = integer(1), maxiter = as.integer(maxiter), time = double(7),
        PACKAGE = "quantreg")[c("sol", "ierr", "maxiter", "time")]
    ierr <- srqfnb.o$ierr
    if (!(ierr == 0) && warn.mesg)
        warning(sfnMessage(ierr))
    list(coef = -srqfnb.o$sol, ierr = ierr, it = srqfnb.o$maxiter,
        time = sum(srqfnb.o$time))
}
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2010-10-17 20:25:01
library(quantreg)
m <- 3
n <- 10
s <- rep(1:n,rep(m,n))
x <- exp(rnorm(n*m))
X <- cbind(1,x)
u <- x*rnorm(m*n) + (1-x)*rf(m*n,3,3)
a <- rep(rnorm(n),rep(m,n))
y <- a + u
#########Sparse Regression Quantile Fitting
sX <- as.matrix.csr(X)
fit.sfn=rq.fit.sfn(sX, y)
fit.sfn
#$coef
#[1]  0.7230244 -1.5599576

######block bootstrap
n1=length(y)  #30
b=1000     #Number of bootstrap samples
boot_bhat=matrix(NA,b, dim(X)[2])
block_length = 10  
num_blocks = n1/block_length    #n/block_length  #3
Indices = seq(1:n1)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){          #Number of bootstrap samples
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]       #Find which data are in each block  
Ind_sim = c(Ind_sim)      
Xsim = X[Ind_sim,1:2]      #Construct the x data
sXsim <- as.matrix.csr(Xsim)
Ysim = y[Ind_sim]          #Construct the y data
boot_bhat[i,] = rq.fit.sfn(sXsim, y)$coef
}
bhat=colMeans(boot_bhat)
#bhat
cov=cov(boot_bhat)
serr = sqrt(diag(cov))
#serr   

p <- length(bhat)
rdf <- n1 - p
vnames<- dimnames(x)[[2]]
coef <- array(bhat, c(p, 4))
dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
coef[, 2] <- serr
coef[, 3] <- coef[, 1]/coef[, 2]
coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
coef

             Value          Std. Error       t value          Pr(>|t|)
#[1,]  0.03135153  0.7788350  0.04025439 0.9681760
#[2,] -0.52056648  0.8418555 -0.61835611 0.5413385

##############################
rq.fit.panel<-function(X,y,s,w=c(.25,.5,.25),taus=(1:3)/4,lambda = 1){
# prototype function for panel data fitting of QR models
# the matrix X is assumed to contain an intercept
# the vector s is a strata indicator assumed (so far) to be a one-way layout
# NB:  
# 1.  The value of the shrinkage parameter lambda is an open research problem in
#       the simplest homogneous settings it should be the ratio of the scale parameters
#       of the fixed effects and the idiocyncratic errors
# 2.  On return the coefficient vector has m*p + n elements where m is the number
#       quantiles being estimated, p is the number of colums of X, and n is the
#       number of distinct values of s.  The first m*p coefficients are the
#       slope estimates, and the last n are the "fixed effects"
# 3.  Like all shrinkage (regularization) estimators, asymptotic inference is somewhat
#       problematic... so the bootstrap is the natural first resort.


        require(SparseM)
        require(quantreg)
        K <- length(w)
        if(K != length(taus))
                stop("length of w and taus must match")
        X <- as.matrix(X)
        p <- ncol(X)
        n <- length(levels(as.factor(s)))
        N <- length(y)
        if(N != length(s) || N != nrow(X))
                stop("dimensions of y,X,s must match")
        Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))
        Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)
        Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr"))
        D <- rbind(Fidelity,Penalty)
        y <- c(w %x% y,rep(0,n))
        a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)),
                sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1,n))
        rq.fit.sfn(D,y,rhs=a)
        }
#########rq.fit.panel
fit.panel <- rq.fit.panel(X,y,s)
fit.panel
#$coef
#[1]  3.5696496 -3.8264048  3.0765997 -2.0488377  2.9257768 -0.8172033 -3.6342146 -3.6971347
#[9] -0.1692880 -0.5130398 -2.1588006 -0.9896829 -3.6029453 -0.9873640 -1.0739194 -1.5147111

######block bootstrap
n1=length(y)  #30
b=1000     #Number of bootstrap samples
ncoef=16
boot_bhat=matrix(NA,b, ncoef)
block_length = 10  
num_blocks = n1/block_length    #n/block_length  
Indices = seq(1:n1)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){          #Number of bootstrap samples
randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
Ind_sim = Indices[,randblock]       #Find which data are in each block  
Ind_sim = c(Ind_sim)      
Xsim = X[Ind_sim,1:dim(X)[2]]      #Construct the x data
Ysim = y[Ind_sim]          #Construct the y data
boot_bhat[i,] = rq.fit.panel(Xsim,Ysim,s)$coef
}
bhat=colMeans(boot_bhat)
#bhat

cov=cov(boot_bhat)
serr = sqrt(diag(cov))
#serr   

p <- length(bhat)
rdf <- n1 - p
vnames<- dimnames(x)[[2]]
coef <- array(bhat, c(p, 4))
dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
coef[, 2] <- serr
coef[, 3] <- coef[, 1]/coef[, 2]
coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
coef
> coef
           Value           Std. Error   t value        Pr(>|t|)
[1,]  3.5833974   1.224927  2.925397 0.011071952
[2,] -3.2408558   0.836343 -3.875032 0.001682470
[3,]  3.4498021   1.443538  2.389824 0.031476152
[4,] -1.7417926   1.514311 -1.150221 0.269321804
[5,]  3.2779020   1.430895  2.290806 0.038006777
[6,] -0.3387540   1.453237 -0.233103 0.819055078
[7,] -2.8649529   1.526798 -1.876445 0.081595934
[8,] -2.2847649   1.831747 -1.247315 0.232744869
[9,] -2.2825497   1.571369 -1.452587 0.168381907
[10,] -2.6825952   0.885694 -3.028806 0.009021254
[11,] -2.6366763   1.210942 -2.177375 0.047051236
[12,] -1.1954863   0.885605 -1.349909 0.198467848
[13,] -3.6453777   1.662293 -2.192982 0.045697269
[14,] -1.7934437   1.630096 -1.100208 0.289800173
[15,] -1.7644270   1.041941 -1.693405 0.112501121
[16,] -2.6937529   1.733657 -1.553798 0.142542035
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

点击查看更多内容…
相关推荐
栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群