Skip to content
pddp_2means.c 32.2 KiB
Newer Older
/***********************************************************************************************
*cr                                       University of Patras, Greece
*cr                                   Copyright (c) 2015 University of Patras
*cr                                           All rights reserved
*cr
*cr                                           Developed by: HPClab 
*cr                               Computer Engineering and Informatics Department
*cr                                           University of Patras
*cr
***********************************************************************************************/

#include "pddp_2means.h"

/******************************************************************************/

#pragma offload_attribute (push, target(mic))

/******************************************************************************/

volatile unsigned long	leaves = 1, active_data_points;
unsigned long		num_of_cores;

/******************************************************************************/

void print_elapsed_time(struct timeval start, struct timeval end, char *msg)
{
	printf("%s: %f sec\n", msg, ((end.tv_sec - start.tv_sec) * 1000000.0 + (end.tv_usec - start.tv_usec)) / 1000000.0 );
	fflush(NULL);
}

/******************************************************************************/

Node *allocate_node(unsigned long data_points)
{
	Node	*temp;
	int	error;

	error = posix_memalign((void **)(&temp), 64, sizeof(Node));

	if (error != 0) {
		printf("ERROR: Could not allocate memory for tree node.\n");
		exit(0);
	}

	return(temp);
}

/******************************************************************************/

void init_node(Node *node, Node *sibling, double *M, unsigned long data_points, unsigned long *indices, double *centroid, unsigned long attributes, unsigned long padded_attributes, unsigned long threads)
{
	node->num_of_indices 	= data_points;
	node->indices		= indices;
	node->centroid		= centroid;
	node->leftchild		= NULL;
	node->rightchild	= NULL;
	node->sibling		= sibling;
	node->keep_in_tree	= 0;
	node->is_splittable  	= 1;
	node->scat		= calc_scatter(node, M, attributes, padded_attributes, threads);
}

/******************************************************************************/

void process_node(Node *node, double *M, unsigned long attributes, unsigned long padded_attributes, unsigned long current_level, unsigned long max_level)
{
	unsigned long	l_data_points, r_data_points, *l_indices, *r_indices, threads;
	double		*l_centroid, *r_centroid, *v;

	#if defined(_OPENMP)
	omp_set_nested(1);
	#endif

	threads = lround(((double)(node->num_of_indices * num_of_cores)) / ((double)active_data_points));
	if (threads < 1) {
		threads = 1;
	}

	/*
	 * Calculate leading principal component using the power iteration method.
	 */
	power_iteration(node, M, attributes, padded_attributes, &v, threads);

	/*
	 * Split cluster into 2 new clusters using the 2-means algorithm.
	 */
	vector_2_means(node, M, v, attributes, padded_attributes, &l_indices, &r_indices, &l_centroid, &r_centroid, &l_data_points, &r_data_points, threads);

	munmap(v, node->num_of_indices * sizeof(double));

	/*
	 * Check that both new clusters will contain enough data points.
	 */
	if ((l_data_points > 1) && (r_data_points > 1)) {

		node->leftchild  = allocate_node(l_data_points);
		node->rightchild = allocate_node(r_data_points);
		DBG("Left  node allocated is %p\n", node->leftchild);
		DBG("Right node allocated is %p\n", node->rightchild);

		init_node(node->leftchild, node->rightchild, M, l_data_points, l_indices, l_centroid, attributes, padded_attributes, threads);
		init_node(node->rightchild, node->leftchild, M, r_data_points, r_indices, r_centroid, attributes, padded_attributes, threads);
		DBG("Left  child has %ld indices\n", node->leftchild->num_of_indices);
		DBG("Right child has %ld indices\n", node->rightchild->num_of_indices);

		#pragma omp atomic
		leaves++;

		if (current_level < max_level - 1) {
			#pragma omp task
			process_node(node->leftchild,  M, attributes, padded_attributes, current_level + 1, max_level);

			#pragma omp task
			process_node(node->rightchild, M, attributes, padded_attributes, current_level + 1, max_level);
		} else {
			#pragma omp atomic
			active_data_points -= node->num_of_indices;
		}
	} else {
		#pragma omp atomic
		active_data_points -= node->num_of_indices;

		node->is_splittable = 0;

		if (l_indices != NULL) {
			munmap(l_indices, l_data_points * sizeof(unsigned long));
		}

		if (r_indices != NULL) {
			munmap(r_indices, r_data_points * sizeof(unsigned long));
		}
		free(l_centroid);
		free(r_centroid);
	}
}

/******************************************************************************/

double calc_scatter(Node *node, double *M, unsigned long attributes, unsigned long padded_attributes, unsigned long threads)
{
	unsigned long	i, j;
	double		scat = 0.0, *M_line;

	#pragma omp parallel for default(none) private(i, j, M_line) shared(node, M, attributes, padded_attributes) reduction(+:scat) num_threads(threads)
	for (i = 0; i < node->num_of_indices; i++) {
		M_line = &M[node->indices[i] * padded_attributes];
		#pragma vector aligned
		#pragma ivdep
		for (j = 0; j < attributes; j++) {
			scat += M_line[j] * M_line[j];
		}
	}

	DBG("Scatter value for node %p is %f\n", node, scat);

	return(scat);
}

/******************************************************************************/

void vector_2_means(Node *node, double *M, double *v, unsigned long attributes, unsigned long padded_attributes, unsigned long **l_indices, unsigned long **r_indices,
		    double **l_centroid, double **r_centroid, unsigned long *l_data_points, unsigned long *r_data_points, unsigned long threads)
{
	double		*c_l, *c_r, *M_line;
	double		prev_diff, diff = INFINITY;
	unsigned long	i, j, iter, l = 0, r = 0, error, *i_l, *i_r;
	char		*clusters;

	/*
	 * Allocate memory for centroids of the two clusters to be created.
	 */
	error = posix_memalign((void **)(&c_l), 64, attributes * sizeof(double));
	if (error != 0) {
		printf("ERROR: Could not allocate memory for vector c_l.\n");
		exit(0);
	}

	error = posix_memalign((void **)(&c_r), 64, attributes * sizeof(double));
	if (error != 0) {
		printf("ERROR: Could not allocate memory for vector c_r.\n");
		exit(0);
	}

	clusters = (char *)alloca(node->num_of_indices * sizeof(char));

	__assume_aligned(node, 64);
	__assume_aligned(v, 64);
	__assume_aligned(*l_indices, 64);
	__assume_aligned(*r_indices, 64);
	__assume_aligned(*l_centroid, 64);
	__assume_aligned(*r_centroid, 64);
	__assume_aligned(l_data_points, 64);
	__assume_aligned(r_data_points, 64);
//	__assume_aligned(clusters, 64);
	__assume_aligned(M_line, 64);
	__assume_aligned(c_l, 64);
	__assume_aligned(c_r, 64);
	__assume_aligned(i_l, 64);
	__assume_aligned(i_r, 64);

Loading full blame...