Newer
Older
/***********************************************************************************************
*cr University of Patras, Greece
*cr Copyright (c) 2015 University of Patras
*cr All rights reserved
*cr
*cr Developed by: HPClab
*cr Computer Engineering and Informatics Department
*cr University of Patras
*cr
***********************************************************************************************/
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#include "pddp_2means.h"
/******************************************************************************/
#pragma offload_attribute (push, target(mic))
/******************************************************************************/
volatile unsigned long leaves = 1, active_data_points;
unsigned long num_of_cores;
/******************************************************************************/
void print_elapsed_time(struct timeval start, struct timeval end, char *msg)
{
printf("%s: %f sec\n", msg, ((end.tv_sec - start.tv_sec) * 1000000.0 + (end.tv_usec - start.tv_usec)) / 1000000.0 );
fflush(NULL);
}
/******************************************************************************/
Node *allocate_node(unsigned long data_points)
{
Node *temp;
int error;
error = posix_memalign((void **)(&temp), 64, sizeof(Node));
if (error != 0) {
printf("ERROR: Could not allocate memory for tree node.\n");
exit(0);
}
return(temp);
}
/******************************************************************************/
void init_node(Node *node, Node *sibling, double *M, unsigned long data_points, unsigned long *indices, double *centroid, unsigned long attributes, unsigned long padded_attributes, unsigned long threads)
{
node->num_of_indices = data_points;
node->indices = indices;
node->centroid = centroid;
node->leftchild = NULL;
node->rightchild = NULL;
node->sibling = sibling;
node->keep_in_tree = 0;
node->is_splittable = 1;
node->scat = calc_scatter(node, M, attributes, padded_attributes, threads);
}
/******************************************************************************/
void process_node(Node *node, double *M, unsigned long attributes, unsigned long padded_attributes, unsigned long current_level, unsigned long max_level)
{
unsigned long l_data_points, r_data_points, *l_indices, *r_indices, threads;
double *l_centroid, *r_centroid, *v;
#if defined(_OPENMP)
omp_set_nested(1);
#endif
threads = lround(((double)(node->num_of_indices * num_of_cores)) / ((double)active_data_points));
if (threads < 1) {
threads = 1;
}
/*
* Calculate leading principal component using the power iteration method.
*/
power_iteration(node, M, attributes, padded_attributes, &v, threads);
/*
* Split cluster into 2 new clusters using the 2-means algorithm.
*/
vector_2_means(node, M, v, attributes, padded_attributes, &l_indices, &r_indices, &l_centroid, &r_centroid, &l_data_points, &r_data_points, threads);
munmap(v, node->num_of_indices * sizeof(double));
/*
* Check that both new clusters will contain enough data points.
*/
if ((l_data_points > 1) && (r_data_points > 1)) {
node->leftchild = allocate_node(l_data_points);
node->rightchild = allocate_node(r_data_points);
DBG("Left node allocated is %p\n", node->leftchild);
DBG("Right node allocated is %p\n", node->rightchild);
init_node(node->leftchild, node->rightchild, M, l_data_points, l_indices, l_centroid, attributes, padded_attributes, threads);
init_node(node->rightchild, node->leftchild, M, r_data_points, r_indices, r_centroid, attributes, padded_attributes, threads);
DBG("Left child has %ld indices\n", node->leftchild->num_of_indices);
DBG("Right child has %ld indices\n", node->rightchild->num_of_indices);
#pragma omp atomic
leaves++;
if (current_level < max_level - 1) {
#pragma omp task
process_node(node->leftchild, M, attributes, padded_attributes, current_level + 1, max_level);
#pragma omp task
process_node(node->rightchild, M, attributes, padded_attributes, current_level + 1, max_level);
} else {
#pragma omp atomic
active_data_points -= node->num_of_indices;
}
} else {
#pragma omp atomic
active_data_points -= node->num_of_indices;
node->is_splittable = 0;
if (l_indices != NULL) {
munmap(l_indices, l_data_points * sizeof(unsigned long));
}
if (r_indices != NULL) {
munmap(r_indices, r_data_points * sizeof(unsigned long));
}
free(l_centroid);
free(r_centroid);
}
}
/******************************************************************************/
double calc_scatter(Node *node, double *M, unsigned long attributes, unsigned long padded_attributes, unsigned long threads)
{
unsigned long i, j;
double scat = 0.0, *M_line;
#pragma omp parallel for default(none) private(i, j, M_line) shared(node, M, attributes, padded_attributes) reduction(+:scat) num_threads(threads)
for (i = 0; i < node->num_of_indices; i++) {
M_line = &M[node->indices[i] * padded_attributes];
#pragma vector aligned
#pragma ivdep
for (j = 0; j < attributes; j++) {
scat += M_line[j] * M_line[j];
}
}
DBG("Scatter value for node %p is %f\n", node, scat);
return(scat);
}
/******************************************************************************/
void vector_2_means(Node *node, double *M, double *v, unsigned long attributes, unsigned long padded_attributes, unsigned long **l_indices, unsigned long **r_indices,
double **l_centroid, double **r_centroid, unsigned long *l_data_points, unsigned long *r_data_points, unsigned long threads)
{
double *c_l, *c_r, *M_line;
double prev_diff, diff = INFINITY;
unsigned long i, j, iter, l = 0, r = 0, error, *i_l, *i_r;
char *clusters;
/*
* Allocate memory for centroids of the two clusters to be created.
*/
error = posix_memalign((void **)(&c_l), 64, attributes * sizeof(double));
if (error != 0) {
printf("ERROR: Could not allocate memory for vector c_l.\n");
exit(0);
}
error = posix_memalign((void **)(&c_r), 64, attributes * sizeof(double));
if (error != 0) {
printf("ERROR: Could not allocate memory for vector c_r.\n");
exit(0);
}
clusters = (char *)alloca(node->num_of_indices * sizeof(char));
__assume_aligned(node, 64);
__assume_aligned(v, 64);
__assume_aligned(*l_indices, 64);
__assume_aligned(*r_indices, 64);
__assume_aligned(*l_centroid, 64);
__assume_aligned(*r_centroid, 64);
__assume_aligned(l_data_points, 64);
__assume_aligned(r_data_points, 64);
// __assume_aligned(clusters, 64);
__assume_aligned(M_line, 64);
__assume_aligned(c_l, 64);
__assume_aligned(c_r, 64);
__assume_aligned(i_l, 64);
__assume_aligned(i_r, 64);
Loading full blame...