全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 MATLAB等数学软件专版
1187 1
2020-06-13

Author: Daniel tliups liu


MATLAB C++ openMP mulit threads pararrel compute

dertransp.c

/* Inputs: prhs[0] = dA/dx corresponding to a matrix A of size (nrow x ncol)
 *         prhs[1] = nrow of matrix A
 * Outputs: plhs[0] = dA'/dx corresponding to transposed matrix A'
 * Method: This is just a row permutation of dA/dx
 *
 * by SeHyoun Ahn, Aug 2016
 */

#include "mex.h"
#include <stdlib.h>
#include <time.h>

void insertsort(mwIndex *irs, double *prs, mwSize n) {
    mwIndex i,j;
    mwIndex swapind;
    double swapval;
    for (i=1; i<n; ++i) {
        swapind = irs[i];
        swapval = prs[i];
        for (j=i; j>=0;--j) {
            if (j==0) {
                irs[j] = swapind;
                prs[j] = swapval;
                break;
            }
            else if (swapind<irs[j-1]) {
                irs[j] = irs[j-1];
                prs[j] = prs[j-1];
            }
            else {
                irs[j] = swapind;
                prs[j] = swapval;
                break;
            }
        }
    }
};

/* It uses in-place quicksort to get from unsorted CSC to sorted CSC format*/
void quicksort(mwIndex* irs, double* prs, mwSize n) {
    mwIndex front, back, pivot;
    mwIndex swapind;
    double swapval;
    pivot = rand()%n;
    front = rand()%n;
    back = rand()%n;
    if (irs[front]>irs[back]) {
        if (irs[pivot]>irs[front]) {
            pivot = irs[front];
            swapval = prs[front];
            irs[front] = irs[0];
            prs[front] = prs[0];
            irs[0] = pivot;
            prs[0] = swapval;
        }
        else if (irs[pivot]>irs[back]) {
            front = irs[pivot];
            swapval = prs[pivot];
            irs[pivot] = irs[0];
            prs[pivot] = prs[0];
            irs[0] = front;
            prs[0] = swapval;
            pivot = front;
        }
        else {
            pivot = irs[back];
            swapval = prs[back];
            irs[back] = irs[0];
            prs[back] = prs[0];
            irs[0] = pivot;
            prs[0] = swapval;
        }
    }
    else {
        if (irs[pivot]>irs[back]) {
            pivot = irs[back];
            swapval = prs[back];
            irs[back] = irs[0];
            prs[back] = prs[0];
            irs[0] = pivot;
            prs[0] = swapval;
        }
        else if (irs[pivot]>irs[front]) {
            back = irs[pivot];
            swapval = prs[pivot];
            irs[pivot] = irs[0];
            prs[pivot] = prs[0];
            irs[0] = back;
            prs[0] = swapval;
            pivot = back;
        }
        else {
            pivot = irs[front];
            swapval = prs[front];
            irs[front] = irs[0];
            prs[front] = prs[0];
            irs[0] = pivot;
            prs[0] = swapval;
        }
    }
    front = 1;
    back = n-1;

    while (front < back) {
        if (irs[front] < pivot) {
            ++front;
        }
        else if (irs[back] > pivot) {
            --back;
        }
        else {
            swapind = irs[back];
            swapval = prs[back];
            irs[back] = irs[front];
            prs[back] = prs[front];
            irs[front]= swapind;
            prs[front]= swapval;
            ++front;
        }
    }
    if (irs[front]<pivot) {
        swapind = irs[front];
        swapval = prs[front];
        irs[front] = irs[0];
        prs[front] = prs[0];
        irs[0] = swapind;
        prs[0] = swapval;
        if (front > 17)
            quicksort(&irs[0],&prs[0],front);
        else if (front > 1)
            insertsort(&irs[0],&prs[0],front);
        if ((n-1-front) > 17)
            quicksort(&irs[front+1],&prs[front+1],n-1-front);
        else if ((n-1-front) > 1)
            insertsort(&irs[front+1],&prs[front+1],n-1-front);
    }
    else {
        swapind = irs[front-1];
        swapval = prs[front-1];
        irs[front-1] = irs[0];
        prs[front-1] = prs[0];
        irs[0] = swapind;
        prs[0] = swapval;
        if (front-1 > 17) {
            quicksort(&irs[0],&prs[0],front-1);
        }
        else if (front-1 > 1) {
            insertsort(&irs[0],&prs[0],front-1);
        }
        if (n-front > 17) {
            quicksort(&irs[front],&prs[front],n-front);
        }
        else if (n-front > 1) {
            insertsort(&irs[front],&prs[front],n-front);
        }
    }
};

void mexFunction(int nlhs, mxArray *plhs[],int nrhs,const mxArray *prhs[])
{
    mwSize nrow, ncol, nderiv,nnz;
    mwIndex *irsA, *jcsA;
    double *prA;
    mwIndex *lirs, *ljcs;
    double *lpr;
    mwIndex i,j,tmp;

    srand(time(NULL));


    /* Read In Sparse Matrix */
    nderiv = mxGetN(prhs[0]);
    ncol = mxGetM(prhs[0]);
    irsA = mxGetIr(prhs[0]);
    jcsA = mxGetJc(prhs[0]);
    prA = mxGetPr(prhs[0]);
    nnz = jcsA[nderiv];

    /* Read in the number of rows in the Matrix A */
    nrow = mxGetScalar(prhs[1]);
    ncol = ncol/nrow;

    /* Prepare Output Matrix */
    plhs[0] = mxCreateSparse(nrow*ncol, nderiv, nnz, mxREAL);
    lirs = mxGetIr(plhs[0]);
    ljcs = mxGetJc(plhs[0]);
    lpr = mxGetPr(plhs[0]);

    ljcs[0] = 0;
    for (i=0; i<nderiv; ++i) {
        /* Compute the new row Index */
        for (j = jcsA[i]; j<jcsA[i+1]; ++j){
            lirs[j] = (irsA[j]%nrow)*ncol + irsA[j]/nrow;
            lpr[j] = prA[j];
        }
        ljcs[i+1] = jcsA[i+1];

        /* Sort to ensure sorted CSC format */
        if ( (ljcs[i+1] - ljcs[i]) > 17) {
            quicksort(&lirs[ljcs[i]], &lpr[ljcs[i]], ljcs[i+1]-ljcs[i]);
        }
        else if ( (ljcs[i+1] - ljcs[i]) > 1) {
            insertsort(&lirs[ljcs[i]], &lpr[ljcs[i]], ljcs[i+1]-ljcs[i]);
        }
    }
}

opemMP parallel

dertransp_parallel.c

/* Inputs: prhs[0] = dA/dx corresponding to a matrix A of size (nrow x ncol)
 *         prhs[1] = nrow of matrix A
 * Outputs: plhs[0] = dA'/dx corresponding to transposed matrix A'
 * Method: This is just a row permutation of dA/dx
 *
 * by SeHyoun Ahn, Aug 2016
 */

#include <omp.h>
#include "mex.h"
#include <stdlib.h>
#include <time.h>

void insertsort(mwIndex *irs, double *prs, mwSize n) {
  mwIndex i,j;
  mwIndex swapind;
  double swapval;
  for (i=1; i<n; ++i) {
    swapind = irs[i];
    swapval = prs[i];
    for (j=i; j>=0;--j) {
      if (j==0) {
        irs[j] = swapind;
        prs[j] = swapval;
      }
      else if (swapind<irs[j-1]) {
        irs[j] = irs[j-1];
        prs[j] = prs[j-1];
      }
      else {
        irs[j] = swapind;
        prs[j] = swapval;
        break;
      }
    }
  }
};

void quicksort(mwIndex* irs, double* prs, mwSize n) {
  mwIndex front, back, pivot;
  mwIndex swapind;
  double swapval;
  pivot = rand()%n;
  front = rand()%n;
  back = rand()%n;
  if (irs[front]>irs[back]) {
    if (irs[pivot]>irs[front]) {
      pivot = irs[front];
      irs[front] = irs[0];
      irs[0] = pivot;
    }
    else if (irs[pivot]>irs[back]) {
      front = irs[pivot];
      irs[pivot] = irs[0];
      irs[0] = front;
      pivot = front;
    }
    else {
      pivot = irs[back];
      irs[back] = irs[0];
      irs[0] = pivot;
    }
  }
  else {
    if (irs[pivot]>irs[back]) {
      pivot = irs[back];
      irs[back] = irs[0];
      irs[0] = pivot;
    }
    else if (irs[pivot]>irs[front]) {
      back = irs[pivot];
      irs[pivot] = irs[0];
      irs[0] = back;
      pivot = back;
    }
    else {
      pivot = irs[front];
      irs[front] = irs[0];
      irs[0] = pivot;
    }
  }
  front = 1;
  back = n-1;

  while (front < back) {
    if (irs[front] < pivot) {
      ++front;
    }
    else if (irs[back] > pivot) {
      --back;
    }
    else {
      swapind = irs[back];
      swapval = prs[back];
      irs[back] = irs[front];
      prs[back] = prs[front];
      irs[front]= swapind;
      prs[front]= swapval;
      ++front;
    }
  }
  if (irs[front]<pivot) {
    swapind = irs[front];
    swapval = prs[front];
    irs[front] = irs[0];
    prs[front] = prs[0];
    irs[0] = swapind;
    prs[0] = swapval;
    if (front > 17)
      quicksort(&irs[0],&prs[0],front);
    else if (front > 1)
      insertsort(&irs[0],&prs[0],front);
    if ((n-1-front) > 17)
      quicksort(&irs[front+1],&prs[front+1],n-1-front);
    else if ((n-1-front) > 1)
      insertsort(&irs[front+1],&prs[front+1],n-1-front);
  }
  else {
    swapind = irs[front-1];
    swapval = prs[front-1];
    irs[front-1] = irs[0];
    prs[front-1] = prs[0];
    irs[0] = swapind;
    prs[0] = swapval;
    if (front-1 > 17)
      quicksort(&irs[0],&prs[0],front-1);
    else if (front-1 > 1)
      insertsort(&irs[0],&prs[0],front-1);
    if (n-front > 17)
      quicksort(&irs[front],&prs[front],n-front);
    else if (n-front > 1)
      insertsort(&irs[front],&prs[front],n-front);
  }
};

void mexFunction(int nlhs, mxArray *plhs[],int nrhs,const mxArray *prhs[])
{
  srand(time(NULL));
  mwSize nrow, ncol, nderiv,nnz;

  /* Read In Sparse Matrix */
  mwIndex *irsA, *jcsA;
  double *prA;
  nderiv = mxGetN(prhs[0]);
  ncol   = mxGetM(prhs[0]);
  irsA   = mxGetIr(prhs[0]);
  jcsA   = mxGetJc(prhs[0]);
  prA    = mxGetPr(prhs[0]);
  nnz    = jcsA[nderiv];

  /* Read in the number of rows in the Matrix A */
  nrow   = mxGetScalar(prhs[1]);
  ncol   = ncol/nrow;

  /* Prepare Output Matrix */
  mwIndex *lirs, *ljcs;
  double *lpr;
  plhs[0] = mxCreateSparse(nrow*ncol,nderiv,nnz,mxREAL);

  lirs = mxGetIr(plhs[0]);
  ljcs = mxGetJc(plhs[0]);
  lpr = mxGetPr(plhs[0]);

  // lirs    = mxMalloc( nnz * sizeof(*lirs));
  // ljcs    = mxMalloc( (nderiv+1) * sizeof(*ljcs));
  // lpr     = mxMalloc( nnz * sizeof(*lpr));

  mwIndex i,j,tmp;
  ljcs[0]=0;

  #pragma omp parallel for default(shared) private(i,j) num_threads(6)
  for (i=0; i<nderiv; ++i) {
    /* Compute the new row Index */
    for (j = jcsA[i]; j<jcsA[i+1]; ++j){
      lirs[j] = (irsA[j]%nrow)*ncol + irsA[j]/nrow;
      lpr[j] = prA[j];
    }
    ljcs[i+1] = jcsA[i+1];
    /* Sort to ensure sorted CSC format */
    if ( (ljcs[i+1] - ljcs[i]) > 17)
      quicksort(&lirs[ljcs[i]], &lpr[ljcs[i]], ljcs[i+1]-ljcs[i]);
    else if ( (ljcs[i+1] - ljcs[i]) > 1)
      insertsort(&lirs[ljcs[i]], &lpr[ljcs[i]], ljcs[i+1]-ljcs[i]);
  }
  // plhs[0] = mxCreateSparse(nrow*ncol,nderiv,nnz,mxREAL);
  // if (nnz>0) {
  //   mxSetIr(plhs[0],lirs);
  //   mxSetJc(plhs[0],ljcs);
  //   mxSetPr(plhs[0],lpr);
  // }
}

detail

first of all,the include file is very important

/* Inputs: prhs[0] = dA/dx corresponding to a matrix A of size (nrow x ncol)
 *         prhs[1] = nrow of matrix A
 * Outputs: plhs[0] = dA'/dx corresponding to transposed matrix A'
 * Method: This is just a row permutation of dA/dx
 *
 * by SeHyoun Ahn, Aug 2016
 */

#include <omp.h>
#include "mex.h"
#include <stdlib.h>
#include <time.h>

second use the code line.

 #pragma omp parallel for default(shared) private(i,j) num_threads(6)

I use six threads

Enjoy

Daniel tulips liu. 刘旭东

补充部分

2020/6/13 晚上 20:48 再次修改

openMP 的 C语言文件头文件没加进去,新修改已经加进去。

#include <omp.h>

二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

全部回复
2020-6-13 17:17:57
代码在上一贴。
[url]
https://bbs.pinggu.org/forum.php?mod=viewthread&tid=8559058
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

相关推荐
栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群