全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 R语言论坛
1264 0
2020-06-13

BDgraph pacakage ,A R and C++ multi threads parralle compute.
Copyright © Reza Mohammadi [aut, cre] https://orcid.org/0000-0001-9538-0648,
Ernst Wit [aut] https://orcid.org/0000-0002-3671-9610,
Adrian Dobra [ctb]
Teaching by Daniel tulips liu.
RenMin University of China.


BDgraph/NAMESPACE

NAMESPACE

useDynLib( BDgraph, .registration = TRUE )

export( bdgraph,
		bdgraph.mpl,

		transfer,
		plinks,
		bf,
		precision,
		covariance,
		select,
		pgraph,
		graph.sim,
		adj2link,
		link2adj,
		bdgraph.sim,
		bdgraph.npn,
		compare,

		plotcoda,
		traceplot,
		plotroc,
		
		rgwish,
		rwish,
		gnorm,
		rmvnorm,
		
		summary.bdgraph,
		plot.bdgraph, 
		print.bdgraph, 

		plot.graph, 

		plot.sim,
		print.sim, 
		
		detect_cores,
		get_graph,
		get_g_prior,
		get_g_start,
		get_K_start,
		get_S_n_p,
		get_cores
	   )

S3method( "summary", "bdgraph" )
S3method( "plot"   , "bdgraph" )
S3method( "print"  , "bdgraph" )
S3method( "plot"   , "sim" )
S3method( "print"  , "sim" )
S3method( "plot"   , "graph" )

BDgraph/src/omp_set_num_cores.cpp

omp_set_num_cores.cpp

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
//     Copyright (C) 2012 - 2019  Reza Mohammadi                                                   |
//                                                                                                 |
//     This file is part of BDgraph package.                                                       |
//                                                                                                 |
//     BDgraph is free software: you can redistribute it and/or modify it under                    |
//     the terms of the GNU General Public License as published by the Free                        |
//     Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>.                   |
//                                                                                                 |
//     Maintainer: Reza Mohammadi <a.mohammadi@uva.nl>                                             |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

#include "util.h"

#ifdef _OPENMP
    #include <omp.h>
#endif

extern "C" {
	void omp_set_num_cores( int *cores ) 
	{
	    #ifdef _OPENMP
	        omp_set_num_threads( *cores );
	    #else
	        Rprintf( "  This OS does not support multi-threading for the BDgraph package  \n" ); 
	    #endif
	}
}

BDgraph/src/gm_mpl_Hill_Climb.cpp

gm_mpl_Hill_Climb.cpp

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
//     Copyright (C) 2012 - 2019  Reza Mohammadi                                                   |
//                                                                                                 |
//     This file is part of BDgraph package.                                                       |
//                                                                                                 |
//     BDgraph is free software: you can redistribute it and/or modify it under                    |
//     the terms of the GNU General Public License as published by the Free                        |
//     Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>.                   |
//                                                                                                 |
//     Maintainer: Reza Mohammadi <a.mohammadi@uva.nl>                                             |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

#include "matrix.h"

extern "C" {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Computing the Marginal pseudo-likelihood for HC algorithm and binary data
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void log_mpl_binary_hc( int *node, int mb_node[], int *size_node, double *log_mpl_node, 
                  int data[], int freq_data[], int *length_freq_data, 
                  double *alpha_ijl, int *n )
{
	double alpha_jl      = 2 * *alpha_ijl;
	double log_alpha_ijl = lgammafn_sign( *alpha_ijl, NULL );
	double log_alpha_jl  = lgammafn_sign( alpha_jl, NULL );

	int i_hash, counter, i, j, l, size_mb_conf, mb_node_x_lf, node_x_lf = *node * *length_freq_data;
	double sum_lgamma_fam;

	*log_mpl_node = 0.0;
	int fam_conf_count_0 = 0, fam_conf_count_1 = 0;
		   
	switch( *size_node )
	{
		case 0:
			for( i = 0; i < *length_freq_data; i++ )
				( data[ node_x_lf + i ] == 0 ) ? fam_conf_count_0 += freq_data[ i ] : fam_conf_count_1 += freq_data[ i ];
				
			sum_lgamma_fam = lgammafn_sign( fam_conf_count_0 + *alpha_ijl, NULL ) + lgammafn_sign( fam_conf_count_1 + *alpha_ijl, NULL );
						   
			*log_mpl_node = sum_lgamma_fam - lgammafn_sign( *n + alpha_jl, NULL ) + log_alpha_jl - 2 * log_alpha_ijl;      
			break;
				
		case 1:
			mb_node_x_lf = mb_node[0] * *length_freq_data; 		

			for( l = 0; l < 2; l++ )  // collects the necessary statistics from the data and calculates the score
			{
				fam_conf_count_0 = 0;
				fam_conf_count_1 = 0;
				for( i = 0; i < *length_freq_data; i++ )
					if( data[ mb_node_x_lf + i ] == l ) 
						( data[ node_x_lf + i ] == 0 ) ? fam_conf_count_0 += freq_data[ i ] : fam_conf_count_1 += freq_data[ i ];
								
				sum_lgamma_fam = lgammafn_sign( fam_conf_count_0 + *alpha_ijl, NULL ) + lgammafn_sign( fam_conf_count_1 + *alpha_ijl, NULL );
		   
				//mb_conf_count = fam_conf_count[0] + fam_conf_count[1];
				*log_mpl_node += sum_lgamma_fam - lgammafn_sign( fam_conf_count_0 + fam_conf_count_1 + alpha_jl, NULL );     
			}		

			// adding remaining terms 
			*log_mpl_node += 2 * log_alpha_jl - 4 * log_alpha_ijl;   
			break;
	
		default:			
			int size_bit = sizeof( unsigned long long int ) * CHAR_BIT / 2;
			int sz = size_bit;
			
			vector<int>vec_fam_conf_count_0( *length_freq_data );	
			vector<int>vec_fam_conf_count_1( *length_freq_data );	

			vector<vector<unsigned long long int> > mb_conf( *length_freq_data );		
			vector<vector<unsigned long long int> > data_mb( *length_freq_data );		

			int size_hash = *size_node / sz + 1;
			vector<unsigned long long int> hash_mb( size_hash, 0 );			

			// for case i = 0
			for( j = 0; j < *size_node; j++ )
			{
				i_hash = j / sz;
				//hash_mb[ i_hash ] |= data[ mb_node[j] * *length_freq_data ] << ( j - i_hash * sz );
				hash_mb[ i_hash ] += (unsigned long long)data[ mb_node[ j ] * *length_freq_data ] << ( j - i_hash * sz );
			}
			mb_conf[0] = hash_mb;
			size_mb_conf = 1;

			if( data[ node_x_lf ] == 0 ) 
			{
				vec_fam_conf_count_0[0] = freq_data[0];
				vec_fam_conf_count_1[0] = 0;
			}else{
				vec_fam_conf_count_1[0] = freq_data[0];
				vec_fam_conf_count_0[0] = 0;
			}

			int sizeof_hash = size_hash * sizeof hash_mb[0];
			for( i = 1; i < *length_freq_data; i++ )
			{
				//vector<unsigned long long int> hash_mb( size_hash, 0 );
				memset( &hash_mb[0], 0, sizeof_hash );
				
				for( j = 0; j < *size_node; j++ )
				{
					i_hash = j / sz;
					//hash_mb[ i_hash ] |= data[ mb_node[j] * *length_freq_data + i ] << ( j - i_hash * sz );
					hash_mb[ i_hash ] += (unsigned long long)data[ mb_node[ j ] * *length_freq_data + i ] << ( j - i_hash * sz );
				}
				
				//data_mb[i] = hash_mb;
				counter = 1;
				for( j = 0; j < size_mb_conf; j++ )
					if( hash_mb == mb_conf[ j ] ) 
					{
						( data[ node_x_lf + i ] == 0 ) ? vec_fam_conf_count_0[ j ] += freq_data[ i ] : vec_fam_conf_count_1[ j ] += freq_data[ i ];
						counter = 0;  
						break;
					}					
				
				if( counter )
				{
					//( data[ node_x_lf + i ] == 0 ) ? vec_fam_conf_count_0[ size_mb_conf ] = freq_data[i] : vec_fam_conf_count_1[ size_mb_conf ] = freq_data[i];
					if( data[ node_x_lf + i ] == 0 ) 
					{
						vec_fam_conf_count_0[ size_mb_conf ] = freq_data[ i ];
						vec_fam_conf_count_1[ size_mb_conf ] = 0;
					}else{
						vec_fam_conf_count_1[ size_mb_conf ] = freq_data[ i ];
						vec_fam_conf_count_0[ size_mb_conf ] = 0;
					}
					
					mb_conf[ size_mb_conf++ ] = hash_mb;
				}
			}
						
			for( l = 0; l < size_mb_conf; l++ )  // collects the necessary statistics from the data and calculates the score
				*log_mpl_node += lgammafn_sign( vec_fam_conf_count_0[l] + *alpha_ijl, NULL ) + lgammafn_sign( vec_fam_conf_count_1[l] + *alpha_ijl, NULL ) - lgammafn_sign( vec_fam_conf_count_0[l] + vec_fam_conf_count_1[l] + alpha_jl, NULL );     

			// adding remaining terms 
			*log_mpl_node += size_mb_conf * ( log_alpha_jl - 2 * log_alpha_ijl );   

			break;
	}
}
    
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Parallel function to compute the Marginal pseudo-likelihood for BINARY data
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void log_mpl_binary_parallel_hc( int *node, int mb_node[], int *size_node, double *log_mpl_node, 
                  int data[], int freq_data[], int *length_freq_data, 
                  double *alpha_ijl, int *n )
{
	double alpha_jl      = 2 * *alpha_ijl;
	double log_alpha_ijl = lgammafn_sign( *alpha_ijl, NULL );
	double log_alpha_jl  = lgammafn_sign( alpha_jl, NULL );

	int i, l, size_mb_conf, mb_node_x_lf, node_x_lf = *node * *length_freq_data;
	double sum_lgamma_fam;

	*log_mpl_node = 0.0;
	int fam_conf_count_0 = 0, fam_conf_count_1 = 0;
		   
	switch( *size_node )
	{
		case 0:
			for( i = 0; i < *length_freq_data; i++ )
				( data[ node_x_lf + i ] == 0 ) ? fam_conf_count_0 += freq_data[ i ] : fam_conf_count_1 += freq_data[ i ];
				
			sum_lgamma_fam = lgammafn_sign( fam_conf_count_0 + *alpha_ijl, NULL ) + lgammafn_sign( fam_conf_count_1 + *alpha_ijl, NULL );
						   
			*log_mpl_node = sum_lgamma_fam - lgammafn_sign( *n + alpha_jl, NULL ) + log_alpha_jl - 2 * log_alpha_ijl;      
			break;
				
		case 1:
			mb_node_x_lf = mb_node[0] * *length_freq_data; 		

			for( l = 0; l < 2; l++ )  // collects the necessary statistics from the data and calculates the score
			{
				fam_conf_count_0 = 0;
				fam_conf_count_1 = 0;
				for( i = 0; i < *length_freq_data; i++ )
					if( data[ mb_node_x_lf + i ] == l ) 
						( data[ node_x_lf + i ] == 0 ) ? fam_conf_count_0 += freq_data[ i ] : fam_conf_count_1 += freq_data[ i ];
								
				sum_lgamma_fam = lgammafn_sign( fam_conf_count_0 + *alpha_ijl, NULL ) + lgammafn_sign( fam_conf_count_1 + *alpha_ijl, NULL );
		   
				//mb_conf_count = fam_conf_count[0] + fam_conf_count[1];
				*log_mpl_node += sum_lgamma_fam - lgammafn_sign( fam_conf_count_0 + fam_conf_count_1 + alpha_jl, NULL );     
			}		

			// adding remaining terms 
			*log_mpl_node += 2 * log_alpha_jl - 4 * log_alpha_ijl;   
			break;
	
		default:			
			vector<vector<unsigned long long int> > mb_conf( *length_freq_data );		
			vector<vector<unsigned long long int> > data_mb( *length_freq_data );		
			int size_bit = sizeof( unsigned long long int ) * CHAR_BIT / 2;

			int sz = size_bit;
			int size_hash = *size_node / sz + 1;			
	
			#pragma omp parallel
			{
				int j, i_hash;
				vector<unsigned long long int> hash_mb( size_hash );
				int sizeof_hash = size_hash * sizeof hash_mb[0];
				
				#pragma omp for
				for( int i = 0; i < *length_freq_data; i++ )
				{
					memset( &hash_mb[0], 0, sizeof_hash );
					
					for( j = 0; j < *size_node; j++ )
					{
						i_hash = j / sz;
						//hash_mb[ i_hash ] |= data[ mb_node[j] * *length_freq_data + i ] << ( j - i_hash * sz );
						hash_mb[ i_hash ] += (unsigned long long)data[ mb_node[ j ] * *length_freq_data + i ] << ( j - i_hash * sz );
					}
					data_mb[ i ] = hash_mb;
				}
			}
			
			mb_conf = data_mb;
			std::sort( mb_conf.begin(), mb_conf.end() );
			mb_conf.erase( std::unique( mb_conf.begin(), mb_conf.end() ), mb_conf.end() );
			size_mb_conf = mb_conf.size();

			for( l = 0; l < size_mb_conf; l++ )  // collects the necessary statistics from the data and calculates the score
			{
				fam_conf_count_0 = 0;
				fam_conf_count_1 = 0;
				for( i = 0; i < *length_freq_data; i++ )
					if( data_mb[ i ] == mb_conf[ l ] ) 
						( data[ node_x_lf + i ] == 0 ) ? fam_conf_count_0 += freq_data[ i ] : fam_conf_count_1 += freq_data[ i ];
				
				*log_mpl_node += lgammafn_sign( fam_conf_count_0 + *alpha_ijl, NULL ) + lgammafn_sign( fam_conf_count_1 + *alpha_ijl, NULL ) - lgammafn_sign( fam_conf_count_0 + fam_conf_count_1 + alpha_jl, NULL );     
			}

			// adding remaining terms 
			*log_mpl_node += size_mb_conf * ( log_alpha_jl - 2 * log_alpha_ijl );   

			break;
	}
}
    	
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Computing the Marginal pseudo-likelihood for discrete data
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void log_mpl_hc_dis( int *node, int mb_node[], int *size_node, double *log_mpl_node, 
                  int data[], int freq_data[], int *length_freq_data, 
                  int max_range_nodes[], double *alpha_ijl, int *n )
{
	int i, j, l, size_mb_conf;
	int mb_node_x_lf, mb_conf_l, mb_conf_count;
	int node_x_lf = *node * *length_freq_data;
	double sum_lgamma_fam;

	vector<int>mb_conf( *length_freq_data );
	
    int max_range_node_j = max_range_nodes[ *node ];
	vector<int>fam_conf_count( max_range_node_j );
		   
    double alpha_jl = max_range_node_j * *alpha_ijl;   

	if( *size_node == 0 ) size_mb_conf = 1;
	
	vector<int>data_mb( *length_freq_data, 0 ); 
	
	if( *size_node == 1 ) 
	{
        // data_mb = data[ , mb_node ]          
		mb_node_x_lf = mb_node[0] * *length_freq_data; 
		//for( i = 0; i < *length_freq_data; i++ ) data_mb[i] = data[ mb_node_x_lf + i ]; 
		memcpy( &data_mb[0], &data[0] + mb_node_x_lf, sizeof( int ) * *length_freq_data );    
        
        size_mb_conf = max_range_nodes[ mb_node[0] ];            
        // mb_conf = 1:size_mb_conf;      
		for( j = 0; j < size_mb_conf; j++ ) mb_conf[ j ] = j;     
	}
	
	if( *size_node > 1 ) 
	{
		vector<int>cumprod_mb( *size_node );
		cumprod_mb[0] = max_range_nodes[ mb_node[ 0 ] ];
		//cumprod_mb   = t( t( c( 1, cumprod( max_range_nodes[ mb_node[ 2:length( mb_node ) ] ] ) ) ) )
		for( j = 1; j < *size_node; j++ )
			cumprod_mb[ j ] = cumprod_mb[ j - 1 ] * max_range_nodes[ mb_node[ j ] ];
			
        //data_mb = c( data[ , mb_node ] %*% cumprod_mb ) 
		for( i = 0; i < *length_freq_data; i++ )
			for( j = 0; j < *size_node; j++ )
				data_mb[ i ] += cumprod_mb[ j ] * data[ mb_node[ j ] * *length_freq_data + i ];
			
        //mb_conf      = unique( data_mb )            
		//vector<int>mb_conf( *n );
		//for( i = 0; i < *length_freq_data; i++ ) mb_conf[i] = data_mb[i];
		memcpy( &mb_conf[0], &data_mb[0], sizeof( int ) * *length_freq_data );

		std::sort( mb_conf.begin(), mb_conf.end() );
		mb_conf.erase( std::unique( mb_conf.begin(), mb_conf.end() ), mb_conf.end() );	
		
		size_mb_conf = mb_conf.size(); 
	}
	
	if( *size_node == 0 ) 
	{
		//for( i in 1:max_range_node_j ) fam_conf_count[i] = sum( ( data[ , node ] == i ) * ind )
		for( j = 0; j < max_range_node_j; j++ ) 
		{  
			 //fam_conf_count[j] = std::count( &data[0] + node_x_n, &data[0] + node_x_n + *n, j + 1 );
			fam_conf_count[ j ] = 0;
			for( i = 0; i < *length_freq_data; i++ )
				if( data[ node_x_lf + i ] == j ) fam_conf_count[ j ] += freq_data[ i ];
		}
			
		sum_lgamma_fam = 0.0;
		for( j = 0; j < max_range_node_j; j++ ) 
			sum_lgamma_fam += lgammafn( fam_conf_count[ j ] + *alpha_ijl );
					   
		*log_mpl_node = sum_lgamma_fam - lgammafn( *n + alpha_jl );      
	}

	if( *size_node > 0 ) 
	{
		*log_mpl_node = 0.0;
		for( l = 0; l < size_mb_conf; l++ )  // collects the necessary statistics from the data and calculates the score
		{
			//ind = c( ( data_mb == mb_conf[ l ] ) * 1 )   # finds positions of MB configuration 
			//mb_conf_count = sum( ind )  # n_jl
			mb_conf_l = mb_conf[ l ];
			//mb_conf_count = std::count( data_mb.begin(), data_mb.end(), mb_conf_l );
			mb_conf_count = 0;
			for( i = 0; i < *length_freq_data; i++ )
				if( data_mb[ i ] == mb_conf_l ) mb_conf_count += freq_data[ i ];
			
			//fam_conf_count = node_conf * 0
			//for( j in 1:max_range_node_j ) fam_conf_count[j] = sum( ( data[ , node ] == j ) * ind )
			for( j = 0; j < max_range_node_j; j++ )
			{
				fam_conf_count[ j ] = 0;
				for( i = 0; i < *length_freq_data; i++ )
					if( ( data[ node_x_lf + i ] == j ) && ( data_mb[ i ] == mb_conf_l ) ) fam_conf_count[ j ] += freq_data[ i ];
			}

			sum_lgamma_fam = 0.0;
			for( j = 0; j < max_range_node_j; j++ ) 
				sum_lgamma_fam += lgammafn( fam_conf_count[ j ] + *alpha_ijl );
	   
			*log_mpl_node += sum_lgamma_fam - lgammafn( mb_conf_count + alpha_jl );     
		}		
	}
	
	// adding remaining terms 
    *log_mpl_node += size_mb_conf * lgammafn( alpha_jl ) - size_mb_conf * max_range_node_j * lgammafn( *alpha_ijl );     
}
     
} // End of exturn "C"

BDgraph/R/graph.sim.R

graph.sim.R

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#     Copyright (C) 2012 - 2019  Reza Mohammadi                                |
#                                                                              |
#     This file is part of BDgraph package.                                    |
#                                                                              |
#     BDgraph is free software: you can redistribute it and/or modify it under |
#     the terms of the GNU General Public License as published by the Free     |
#     Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>.|
#                                                                              |
#     Maintainer: Reza Mohammadi <a.mohammadi@uva.nl>                          |
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#     Graph generator                                                          |
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

graph.sim = function( p = 10, graph = "random", prob = 0.2, size = NULL, class = NULL, vis = FALSE )
{
    if( p < 2 ) stop( "'p' must be more than 1" )
    if( ( sum( prob < 0 ) + sum( prob > 1 ) ) != 0 ) stop( "'prob' must be between 0 and 1" )
 
    G <- matrix( 0, p, p )
    
    # - - build the graph structure - - - - - - - - - - - - - - - - - - - - -  |
    if( graph == "random" )
    {
        if( is.null( size ) )
        {
            G[ upper.tri( G ) ] <- stats::rbinom( p * ( p - 1 ) / 2, 1, prob )
        }else{
            if( size < 0 | size > p * ( p - 1 ) / 2 )  stop( "Graph size must be between zero and p*(p-1)/2" )
            
            smp <- sample( 1 : ( p * ( p - 1 ) / 2 ), size, replace = FALSE )
            G[ upper.tri( G ) ][smp] <- 1
        }
    }
    
    if( graph == "cluster" )
    {
        # partition variables
        if( is.null( class ) )
        { 
            #class = NULL
            if( !is.null( size ) )   class = length( size )
            if( length( prob ) > 1 ) class = length( prob )
            if( is.null( class ) )   class = max( 2, ceiling( p / 20 ) )
            #if( !is.null( size ) ) class <- length( size ) else class <- max( 2, ceiling( p / 20 ) )
        }
        
        g.large <- p %% class
        g.small <- class - g.large
        n.small <- floor( p / class )
        n.large <- n.small + 1
        vp      <- c( rep( n.small, g.small ), rep( n.large, g.large ) )
        
        if( is.null( size ) )
        {
            if( length( prob ) != class ) prob = rep( prob, class )
            
            for( i in 1 : class )
            {
                tmp <- if( i == 1 ) ( 1 : vp[ 1 ] ) else ( ( sum( vp[ 1 : ( i - 1 ) ] ) + 1 ) : sum( vp[ 1 : i ] ) )
                gg                <- matrix( 0, vp[ i ], vp[ i ] )
                gg[ upper.tri( gg ) ] <- stats::rbinom( vp[ i ] * ( vp[ i ] - 1 ) / 2, 1, prob[ i ] )
                G[ tmp, tmp ]       <- gg
            }
        }else{
            if( class != length( size ) )  stop( " Number of graph sizes is not match with number of clusters" )
            if( sum( size ) < 0 | sum( size ) > p * ( p - 1 ) / 2 ) stop( " Total graph sizes must be between zero and p*(p-1)/2" )
            
            for( i in 1 : class )
            {
                tmp <- if( i == 1 ) ( 1 : vp[1] ) else ( ( sum( vp[1 : (i-1)] ) + 1 ) : sum( vp[1:i] ) )
                gg  <- matrix( 0, vp[i], vp[i] )
                smp <- sample( 1 : ( vp[i] * (vp[i] - 1) / 2 ), size[i], replace = FALSE )
                gg[upper.tri(gg)][smp] <- 1
                G[tmp, tmp]            <- gg
            }
        }
    }
    
    if( graph == "scale-free" )
    {
        resultGraph = .C( "scale_free", G = as.integer(G), as.integer(p), PACKAGE = "BDgraph" )
        G = matrix( resultGraph $ G, p, p ) 
        
        #j = sample( 1:p, 1 )
        #for( i in ( c( 1:p )[ -j ] ) ) { G[ i, j ] = 1; G[ j, i ] = 1 }
    }
    
    if( ( graph == "lattice" ) | ( graph == "grid" ) )
    {
        if( is.null( size ) )
        {
            length_row = round( sqrt( p ) )
            length_col = round( sqrt( p ) )
        }else{
            if( length( size ) == 1 )
            {
                length_row = size
                length_col = size
            }else{
                length_row = size[ 1 ]
                length_col = size[ 2 ]
            } 
        }
        
        for( row in 1:length_row )
        {
            for( col in 1:length_col )
            {
                if( ( row != length_row ) & ( col != length_col ) )
                    G[ col + ( row - 1 ) * length_col, c( col + ( row - 1 ) * length_col + 1, col + row * length_col ) ] = 1
                if( ( row == length_row ) & ( col != length_col ) )
                    G[ col + ( row - 1 ) * length_col, col + ( row - 1 ) * length_col + 1 ] = 1
                if( ( row != length_row ) & ( col == length_col ) )
                    G[ col + ( row - 1 ) * length_col, col + row * length_col ] = 1
            }
        }
    }
    
    if( graph == "hub" )
    {
        if( is.null( size ) ) size = ceiling( p / 20 ) 

        hub = sample( 1:p, size = size, replace = FALSE )
        
        for( i in 1:size )
        {
            G[ hub[ i ],  ] <- 1
            G[ , hub[ i ] ] <- 1
        }
    }

    if( graph == "star" )
    {
        hub = sample( 1:p, size = 1, replace = FALSE )
        G[ hub,  ] <- 1
        G[ , hub ] <- 1
    }
    
    if( graph == "circle" )
    {
        if( p < 3 ) stop( "For 'circle' graph, 'p' must be more than 2" )
        
        G         <- stats::toeplitz( c( 0, 1, rep( 0, p - 2 ) ) )
        G[ 1, p ] <- 1
    }
    
    G[ lower.tri( G, diag = TRUE ) ] = 0
    G = G + t( G )

    # - - graph visualization - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
    if( vis == TRUE )
        BDgraph::plot.graph( G, main = "Graph structure" )
    
    class( G ) <- "graph"
    return( G )
}
   
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
# plot for class "graph" from graph.sim function
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
plot.graph = function( x, cut = 0.5, mode = "undirected", diag = FALSE, main = NULL, 
                       vertex.color = "white", vertex.label.color = 'black', ... )
{
    #if( is.null( main ) ) main = "Graph structure"
    #vertex.size = ifelse( p < 20, 15, 2 )
    
    graph = BDgraph::get_graph( x, cut = cut )
    
    graph_ig <- igraph::graph.adjacency( graph, mode = mode, diag = diag )
    
    igraph::plot.igraph( graph_ig, main = main, vertex.color = vertex.color, 
                         vertex.label.color = vertex.label.color, ... )
}		

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

BDgraph/man/bdgraph.mpl.Rd

bdgraph.mpl.Rd
thit is product the r help file system ,which like the html .

\name{bdgraph.mpl}
\alias{bdgraph.mpl}
        
\title{	Search algorithm in graphical models using marginal pseudo-likehlihood }
\desc ription{
	This function consists of several sampling algorithms for Bayesian model determination in undirected graphical models based on mariginal pseudo-likelihood. % based on birth-death MCMC method. 
	To speed up the computations, the birth-death MCMC sampling algorithms are implemented in parallel using \pkg{OpenMP} in \code{C++}.
}
\usage{
bdgraph.mpl( data, n = NULL, method = "ggm", transfer = TRUE, 
             algorithm = "bdmcmc", iter = 5000, burnin = iter / 2, 
             g.prior = 0.5, g.start = "empty", 
             jump = NULL, alpha = 0.5, save = FALSE, 
             print = 1000, cores = NULL, operator = "or" )
}

\arguments{
	\item{data}{
	There are two options: (1) an (\eqn{n \times p}) \code{matrix} or a \code{data.fr ame} corresponding to the data, 
	(2) an (\eqn{p \times p}) covariance matrix as \eqn{S=X'X} which \eqn{X} is the data matrix 
	(\eqn{n} is the sample size and \eqn{p} is the number of variables). 
	It also could be an ob ject of class \code{"sim"}, from function \code{\link{bdgraph.sim}}.
	The input matrix is automatically identified by checking the symmetry.
	}

	\item{n}{The number of observations. It is needed if the \code{"data"} is a covariance matrix.}
	  
	\item{method}{
		A character with two options \code{"ggm"} (default), \code{"dgm"} and \code{"dgm-binary"}. 
		Option \code{"ggm"} is for Gaussian graphical models based on Gaussianity assumption.
		Option \code{"dgm"} is for discrete graphical models for the data that are discrete.
		Option \code{"dgm-binary"} is for discrete graphical models for the data that are binary.
	}

	\item{transfer}{ For only discrete data which \code{method = "dgm"} or \code{method = "dgm-binary"}. }

	\item{algorithm}{
		A character with two options \code{"bdmcmc"} (default) and \code{"rjmcmc"}. 
		Option \code{"bdmcmc"} is based on birth-death MCMC algorithm.
		Option \code{"rjmcmc"} is based on reverible jump MCMC algorithm.
		Option \code{"hc"}     is based on hill-climbing algorithm; this algorithm is only for discrete data which \code{method = "dgm"} or \code{method = "dgm-binary"}.
	}
	
	\item{iter}{   The number of iteration for the sampling algorithm. }
	\item{burnin}{ The number of burn-in iteration for the sampling algorithm. }

	\item{g.prior}{
		For determining the prior distribution of each edge in the graph. 
		There are two options: a single value between \eqn{0} and \eqn{1} (e.g. \eqn{0.5} as a noninformative prior) 
		or an (\eqn{p \times p}) matrix with elements between \eqn{0} and \eqn{1}.
	}
	
	\item{g.start}{
		Corresponds to a starting point of the graph. It could be an (\eqn{p \times p}) matrix, \code{"empty"} (default), or \code{"full"}. 
		Option \code{"empty"} means the initial graph is an empty graph and \code{"full"} means a full graph. 
		It also could be an ob ject with \code{S3} class \code{"bdgraph"} of \code{R} package \code{\link[BDgraph]{BDgraph}} or the class \code{"ssgraph"} of \code{R} package \code{\link[ssgraph]{ssgraph}}; 
		this option can be used to run the sampling algorithm from the last ob jects of previous run (see examples).     
	} 

	\item{jump}{
		It is only for the BDMCMC algorithm (\code{algorithm = "bdmcmc"}).
		It is for simultaneously updating multiple links at the same time to update graph in the BDMCMC algorithm.
	}	
	\item{alpha}{ Value of the hyper parameter of Dirichlet, which is a prior distribution. }
	
	\item{save}{
		Logical: if FALSE (default), the adjacency matrices are NOT saved. 
		If TRUE, the adjacency matrices after burn-in are saved.
	}
	\item{print}{ Value to see the number of iteration for the MCMC algorithm. }
	\item{cores}{ The number of cores to use for parallel execution. 
	   The case \code{cores="all"} means all CPU cores to use for parallel execution. 
	   %The default is to use "all" CPU cores of the computer.
	   }	
	\item{operator}{ A character with two options \code{"or"} (default) and \code{"and"}. It is for hill-climbing algorithm. }	   	
}

\value{
	An ob ject with \code{S3} class \code{"bdgraph"} is returned:
	
	\item{p_links}{ An upper triangular matrix which corresponds the estimated posterior probabilities of all possible links. }
	
	For the case "save = TRUE" is returned:

	\item{sample_graphs}{ A vector of strings which includes the adjacency matrices of visited graphs after burn-in.}
	\item{graph_weights}{ A vector which includes the waiting times of visited graphs after burn-in. }

	\item{all_graphs}{A vector which includes the identity of the adjacency matrices for all iterations after burn-in. 
		  It is needed for monitoring the convergence of the BD-MCMC algorithm.}

	\item{all_weights}{A vector which includes the waiting times for all iterations after burn-in. It is needed for monitoring the convergence of the BD-MCMC algorithm.}
}

\references{
Dobra, A. and Mohammadi, R. (2018). Loglinear Model Selection and Human Mobility, \emph{Annals of Applied Statistics}, 12(2):815-845

Mohammadi, A. and Wit, E. C. (2015). Bayesian Structure Learning in Sparse Gaussian Graphical Models, \emph{Bayesian Analysis}, 10(1):109-138

Mohammadi, A. and Dobra, A. (2017). The \code{R} Package \pkg{BDgraph} for Bayesian Structure Learning in Graphical Models, \emph{ISBA Bulletin}, 24(4):11-16

Pensar, J. et al (2017) Marginal pseudo-likelihood learning of discrete Markov network structures, \emph{Bayesian Analysis}, 12(4):1195-215

Mohammadi, R. and Wit, E. C. (2019). \pkg{BDgraph}: An \code{R} Package for Bayesian Structure Learning in Graphical Models, \emph{Journal of Statistical Software}, 89(3):1-30 
}

\author{ Reza Mohammadi \email{a.mohammadi@uva.nl}, Adrian Dobra, and Johan Pensar }

\seealso{ \code{\link{bdgraph}}, \code{\link{bdgraph.sim}}, \code{\link{summary.bdgraph}}, \code{\link{compare}} }

\examples{
# Generating multivariate normal data from a 'random' graph
data.sim <- bdgraph.sim( n = 70, p = 5, size = 7, vis = TRUE )
   
bdgraph.obj <- bdgraph.mpl( data = data.sim, iter = 500 )
  
summary( bdgraph.obj )
   
# To compare the result with true graph
compare( data.sim, bdgraph.obj, main = c( "Target", "BDgraph" ) )
}

\keyword{sampling algorithms}
\keyword{structure learning}
\keyword{iteration}

see you tomorow

yours: Daniel tulips liu (刘旭东)

二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

相关推荐
栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群