/*****************************************************************************/ /*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ /*By downloading, copying, installing or using the software you agree */ /*to this license. If you do not agree to this license, do not download, */ /*install, copy or use the software. */ /* */ /* */ /*Copyright (c) 2005 Northwestern University */ /*All rights reserved. */ /*Redistribution of the software in source and binary forms, */ /*with or without modification, is permitted provided that the */ /*following conditions are met: */ /* */ /*1 Redistributions of source code must retain the above copyright */ /* notice, this list of conditions and the following disclaimer. */ /* */ /*2 Redistributions in binary form must reproduce the above copyright */ /* notice, this list of conditions and the following disclaimer in the */ /* documentation and/or other materials provided with the distribution.*/ /* */ /*3 Neither the name of Northwestern University nor the names of its */ /* contributors may be used to endorse or promote products derived */ /* from this software without specific prior written permission. */ /* */ /*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ /*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ /*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ /*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ /*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ /*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ /*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ /*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ /*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ /*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /*POSSIBILITY OF SUCH DAMAGE. */ /******************************************************************************/ /*************************************************************************/ /** File: example.c **/ /** Description: Takes as input a file: **/ /** ascii file: containing 1 data point per line **/ /** binary file: first int is the number of objects **/ /** 2nd int is the no. of features of each **/ /** object **/ /** This example performs a fuzzy c-means clustering **/ /** on the data. Fuzzy clustering is performed using **/ /** min to max clusters and the clustering that gets **/ /** the best score according to a compactness and **/ /** separation criterion are returned. **/ /** Author: Wei-keng Liao **/ /** ECE Department Northwestern University **/ /** email: wkliao@ece.northwestern.edu **/ /** **/ /** Edited by: Jay Pisharath **/ /** Northwestern University. **/ /** **/ /** ================================================================ **/ /** **/ /** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ /** University of Virginia **/ /** **/ /** Description: No longer supports fuzzy c-means clustering; **/ /** only regular k-means clustering. **/ /** No longer performs "validity" function to analyze **/ /** compactness and separation crietria; instead **/ /** calculate root mean squared error. **/ /** **/ /*************************************************************************/ #define _CRT_SECURE_NO_DEPRECATE 1 #include #include #include #include #include #include #ifdef _OPENMP #include #endif #include "kmeans.h" extern double wtime(void); /*---< usage() >------------------------------------------------------------*/ void usage(char *argv0) { char *help = "\nUsage: %s [switches] -i filename\n\n" " -i filename :file containing data to be clustered\n" " -m max_nclusters :maximum number of clusters allowed [default=5]\n" " -n min_nclusters :minimum number of clusters allowed [default=5]\n" " -t threshold :threshold value [default=0.001]\n" " -l nloops :iteration for each number of clusters [default=1]\n" " -b :input file is in binary format\n" " -r :calculate RMSE [default=off]\n" " -o :output cluster center coordinates [default=off]\n"; fprintf(stderr, help, argv0); exit(-1); } /*---< main() >-------------------------------------------------------------*/ int setup(int argc, char **argv) { int opt; extern char *optarg; char *filename = 0; float *buf; char line[1024]; int isBinaryFile = 0; float threshold = 0.001; /* default value */ int max_nclusters=5; /* default value */ int min_nclusters=5; /* default value */ int best_nclusters = 0; int nfeatures = 0; int npoints = 0; float len; float **features; float **cluster_centres=NULL; int i, j, index; int nloops = 1; /* default value */ int isRMSE = 0; float rmse; int isOutput = 0; #ifdef _OPENMP float cluster_timing, io_timing; #endif /* obtain command line arguments and change appropriate options */ while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) { switch (opt) { case 'i': filename=optarg; break; case 'b': isBinaryFile = 1; break; case 't': threshold=atof(optarg); break; case 'm': max_nclusters = atoi(optarg); break; case 'n': min_nclusters = atoi(optarg); break; case 'r': isRMSE = 1; break; case 'o': isOutput = 1; break; case 'l': nloops = atoi(optarg); break; case '?': usage(argv[0]); break; default: usage(argv[0]); break; } } if (filename == 0) usage(argv[0]); /* ============== I/O begin ==============*/ /* get nfeatures and npoints */ #ifdef _OPENMP io_timing = omp_get_wtime(); #endif if (isBinaryFile) { //Binary file input int infile; if ((infile = open(filename, O_RDONLY, "0600")) == -1) { fprintf(stderr, "Error: no such file (%s)\n", filename); exit(1); } read(infile, &npoints, sizeof(int)); read(infile, &nfeatures, sizeof(int)); /* allocate space for features[][] and read attributes of all objects */ buf = (float*) malloc(npoints*nfeatures*sizeof(float)); features = (float**)malloc(npoints* sizeof(float*)); features[0] = (float*) malloc(npoints*nfeatures*sizeof(float)); for (i=1; i npoints(%d) -- cannot proceed\n", min_nclusters, npoints); exit(0); } srand(7); /* seed for future random number generator */ memcpy(features[0], buf, npoints*nfeatures*sizeof(float)); /* now features holds 2-dimensional array of features */ free(buf); /* ======================= core of the clustering ===================*/ #ifdef _OPENMP cluster_timing = omp_get_wtime(); /* Total clustering time */ #endif cluster_centres = NULL; index = cluster(npoints, /* number of data points */ nfeatures, /* number of features for each point */ features, /* array: [npoints][nfeatures] */ min_nclusters, /* range of min to max number of clusters */ max_nclusters, threshold, /* loop termination factor */ &best_nclusters, /* return: number between min and max */ &cluster_centres, /* return: [best_nclusters][nfeatures] */ &rmse, /* Root Mean Squared Error */ isRMSE, /* calculate RMSE */ nloops); /* number of iteration for each number of clusters */ #ifdef _OPENMP cluster_timing = omp_get_wtime() - cluster_timing; #endif /* =============== Command Line Output =============== */ /* cluster center coordinates :displayed only for when k=1*/ if((min_nclusters == max_nclusters) && (isOutput == 1)) { printf("\n================= Centroid Coordinates =================\n"); for(i = 0; i < max_nclusters; i++){ printf("%d:", i); for(j = 0; j < nfeatures; j++){ printf(" %.2f", cluster_centres[i][j]); } printf("\n\n"); } } len = (float) ((max_nclusters - min_nclusters + 1)*nloops); printf("Number of Iteration: %d\n", nloops); #ifdef _OPENMP printf("Time for I/O: %.5fsec\n", io_timing); printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); #endif if(min_nclusters != max_nclusters){ if(nloops != 1){ //range of k, multiple iteration #ifdef _OPENMP //printf("Average Clustering Time: %fsec\n", // cluster_timing / len); #endif printf("Best number of clusters is %d\n", best_nclusters); } else{ //range of k, single iteration //printf("Average Clustering Time: %fsec\n", // cluster_timing / len); printf("Best number of clusters is %d\n", best_nclusters); } } else{ if(nloops != 1){ // single k, multiple iteration #ifdef _OPENMP printf("Average Clustering Time: %.5fsec\n", cluster_timing / nloops); #endif if(isRMSE) // if calculated RMSE printf("Number of trials to approach the best RMSE of %.3f is %d\n", rmse, index + 1); } else{ // single k, single iteration if(isRMSE) // if calculated RMSE printf("Root Mean Squared Error: %.3f\n", rmse); } } /* free up memory */ free(features[0]); free(features); return(0); }