求一个用C或者C++写的kmeans算法

求一个用C或者C++写的kmeans算法
急用,有的朋友请帮下忙,谢谢!
复制粘贴的就不用了吧,即使用也最好是复制一些国内的,你这个一看就知道是国外的了!而且这个算法不能运行的!

/****************************************************************************
* *
* KMEANS *
* *
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>

// FUNCTION PROTOTYPES

// DEFINES
#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
#define MAXVECTDIM 20
#define MAXPATTERN 20
#define MAXCLUSTER 10

char *f2a(double x, int width){
char cbuf[255];
char *cp;
int i,k;
int d,s;
cp=fcvt(x,width,&d,&s);
if (s) {
strcpy(cbuf,"-");
}
else {
strcpy(cbuf," ");
} /* endif */
if (d>0) {
for (i=0; i<d; i++) {
cbuf[i+1]=cp[i];
} /* endfor */
cbuf[d+1]=0;
cp+=d;
strcat(cbuf,".");
strcat(cbuf,cp);
} else {
if (d==0) {
strcat(cbuf,".");
strcat(cbuf,cp);
}
else {
k=-d;
strcat(cbuf,".");
for (i=0; i<k; i++) {
strcat(cbuf,"0");
} /* endfor */
strcat(cbuf,cp);
} /* endif */
} /* endif */
cp=&cbuf[0];
return cp;
}

// ***** Defined structures & classes *****
struct aCluster {
double Center[MAXVECTDIM];
int Member[MAXPATTERN]; //Index of Vectors belonging to this cluster
int NumMembers;
};

struct aVector {
double Center[MAXVECTDIM];
int Size;
};

class System {
private:
double Pattern[MAXPATTERN][MAXVECTDIM+1];
aCluster Cluster[MAXCLUSTER];
int NumPatterns; // Number of patterns
int SizeVector; // Number of dimensions in vector
int NumClusters; // Number of clusters
void DistributeSamples(); // Step 2 of K-means algorithm
int CalcNewClustCenters();// Step 3 of K-means algorithm
double EucNorm(int, int); // Calc Euclidean norm vector
int FindClosestCluster(int); //ret indx of clust closest to pattern
//whose index is arg
public:
system();
int LoadPatterns(char *fname); // Get pattern data to be clustered
void InitClusters(); // Step 1 of K-means algorithm
void RunKMeans(); // Overall control K-means process
void ShowClusters(); // Show results on screen
void SaveClusters(char *fname); // Save results to file
void ShowCenters();
};

void System::ShowCenters(){
int i,j;
printf("Cluster centers:\n");
for (i=0; i<NumClusters; i++) {
Cluster[i].Member[0]=i;
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
} /* endfor */
printf("\n");
}

int System::LoadPatterns(char *fname){
FILE *InFilePtr;
int i,j;
double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns); // Read # of patterns
fscanf(InFilePtr, "%d", &SizeVector); // Read dimension of vector
fscanf(InFilePtr, "%d", &NumClusters); // Read # of clusters for K-Means
for (i=0; i<NumPatterns; i++) { // For each vector
for (j=0; j<SizeVector; j++) { // create a pattern
fscanf(InFilePtr,"%lg",&x); // consisting of all elements
Pattern[i][j]=x;
} /* endfor */
} /* endfor */
printf("Input patterns:\n");
for (i=0; i<NumPatterns; i++) {
printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);
} /* endfor */
printf("\n--------------------\n");
return SUCCESS;
}
//***************************************************************************
// InitClusters *
// Arbitrarily assign a vector to each of the K clusters *
// We choose the first K vectors to do this *
//***************************************************************************
void System::InitClusters(){
int i,j;
printf("Initial cluster centers:\n");
for (i=0; i<NumClusters; i++) {
Cluster[i].Member[0]=i;
for (j=0; j<SizeVector; j++) {
Cluster[i].Center[j]=Pattern[i][j];
} /* endfor */
} /* endfor */
for (i=0; i<NumClusters; i++) {
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
} /* endfor */
printf("\n");
}

void System::RunKMeans(){
int converged;
int pass;
pass=1;
converged=FALSE;
while (converged==FALSE) {
printf("PASS=%d\n",pass++);
DistributeSamples();
converged=CalcNewClustCenters();
ShowCenters();
} /* endwhile */
}

double System::EucNorm(int p, int c){ // Calc Euclidean norm of vector difference
double dist,x; // between pattern vector, p, and cluster
int i; // center, c.
char zout[128];
char znum[40];
char *pnum;

pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
dist=0;
for (i=0; i<SizeVector ;i++){
x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
strcat(zout,f2a(x,4));
if (i==0)
strcat(zout,"+");
dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
} /* endfor */
printf("%s)\n",zout);
return dist;
}

int System::FindClosestCluster(int pat){
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++) {
d=EucNorm(pat,i);
printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
if (d<MinDist) {
MinDist=d;
ClustID=i;
} /* endif */
} /* endfor */
if (ClustID<0) {
printf("Aaargh");
exit(0);
} /* endif */
return ClustID;
}

void System::DistributeSamples(){
int i,pat,Clustid,MemberIndex;
//Clear membership list for all current clusters
for (i=0; i<NumClusters;i++){
Cluster[i].NumMembers=0;
}
for (pat=0; pat<NumPatterns; pat++) {
//Find cluster center to which the pattern is closest
Clustid= FindClosestCluster(pat);
printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
//post this pattern to the cluster
MemberIndex=Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex]=pat;
Cluster[Clustid].NumMembers++;
} /* endfor */
}

int System::CalcNewClustCenters(){
int ConvFlag,VectID,i,j,k;
double tmp[MAXVECTDIM];
char xs[255];
char ys[255];
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;
char *fpv;

pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
printf("The new cluster centers are now calculated as:\n");
for (i=0; i<NumClusters; i++) { //for each cluster
pnc1=itoa(Cluster[i].NumMembers,nc1,10);
pnc2=itoa(i,nc2,10);
strcpy(xs,"Cluster Center");
strcat(xs,nc2);
strcat(xs,"(1/");
strcpy(ys,"(1/");
strcat(xs,nc1);
strcat(ys,nc1);
strcat(xs,")(");
strcat(ys,")(");
for (j=0; j<SizeVector; j++) { // clear workspace
tmp[j]=0.0;
} /* endfor */
for (j=0; j<Cluster[i].NumMembers; j++) { //traverse member vectors
VectID=Cluster[i].Member[j];
for (k=0; k<SizeVector; k++) { //traverse elements of vector
tmp[k] += Pattern[VectID][k]; // add (member) pattern elmnt into temp
if (k==0) {
strcat(xs,f2a(Pattern[VectID][k],3));
} else {
strcat(ys,f2a(Pattern[VectID][k],3));
} /* endif */
} /* endfor */
if(j<Cluster[i].NumMembers-1){
strcat(xs,"+");
strcat(ys,"+");
}
else {
strcat(xs,")");
strcat(ys,")");
}
} /* endfor */
for (k=0; k<SizeVector; k++) { //traverse elements of vector
tmp[k]=tmp[k]/Cluster[i].NumMembers;
if (tmp[k] != Cluster[i].Center[k])
ConvFlag=FALSE;
Cluster[i].Center[k]=tmp[k];
} /* endfor */
printf("%s,\n",xs);
printf("%s\n",ys);
} /* endfor */
return ConvFlag;
}

void System::ShowClusters(){
int cl;
for (cl=0; cl<NumClusters; cl++) {
printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);
} /* endfor */
}

void System::SaveClusters(char *fname){
}

main(int argc, char *argv[]) {
System kmeans;
if (argc<2) {
printf("USAGE: KMEANS PATTERN_FILE\n");
exit(0);
}
if (kmeans.LoadPatterns(argv[1])==FAILURE ){
printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1]);
exit(0);
}
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
}

温馨提示:答案为网友推荐,仅供参考
第1个回答  2007-11-06
这个吧!!!!!!!!

CODE:
/****************************************************************************
* KMEANS
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>

// FUNCTION PROTOTYPES

// DEFINES
#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
#define MAXVECTDIM 20
#define MAXPATTERN 20
#define MAXCLUSTER 10

char *f2a(double x, int width){
char cbuf[255];
char *cp;
int i,k;
int d,s;
cp=fcvt(x,width,&d,&s);
if (s) {
strcpy(cbuf,"-");
}
else {
strcpy(cbuf," ");
} /* endif */
if (d>0) {
for (i=0; i<d; i++) {
cbuf[i+1]=cp[i];
} /* endfor */
cbuf[d+1]=0;
cp+=d;
strcat(cbuf,".");
strcat(cbuf,cp);
} else {
if (d==0) {
strcat(cbuf,".");
strcat(cbuf,cp);
}
else {
k=-d;
strcat(cbuf,".");
for (i=0; i<k; i++) {
strcat(cbuf,"0");
} /* endfor */
strcat(cbuf,cp);
} /* endif */
} /* endif */
cp=&cbuf[0];
return cp;
}

// ***** Defined structures & classes *****
struct aCluster {
double Center[MAXVECTDIM];

int Member[MAXPATTERN]; //Index of
Vectors belonging to this cluster
int NumMembers;
};

struct aVector {
double Center[MAXVECTDIM];
int Size;
};

class System {
private:
double Pattern[MAXPATTERN][MAXVECTDIM+1];
aCluster Cluster[MAXCLUSTER];
int NumPatterns; // Number of patterns

int SizeVector; //
Number of dimensions in vector
int NumClusters; // Number of clusters
void DistributeSamples(); // Step 2 of K-means algorithm
int CalcNewClustCenters();// Step 3 of K-means algorithm
double EucNorm(int, int); // Calc Euclidean norm vector
int FindClosestCluster(int); //ret indx of clust closest to pattern

//whose index is arg
public:
system();
int LoadPatterns(char *fname); // Get pattern data to be clustered
void InitClusters(); // Step 1 of K-means algorithm

void RunKMeans(); //
Overall control K-means process
void ShowClusters(); // Show results on screen
void SaveClusters(char *fname); // Save results to file
void ShowCenters();
};

void System::ShowCenters(){
int i,j;
printf("Cluster centers:\n");
for (i=0; i<NumClusters; i++) {
Cluster[i].Member[0]=i;
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
} /* endfor */
printf("\n");
}

int System::LoadPatterns(char *fname){
FILE *InFilePtr;
int i,j;
double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns); // Read # of patterns
fscanf(InFilePtr, "%d", &SizeVector); // Read dimension of vector
fscanf(InFilePtr, "%d", &NumClusters); // Read # of clusters for K-Means
for (i=0; i<NumPatterns; i++) { // For each vector
for (j=0; j<SizeVector; j++) { // create a pattern
fscanf(InFilePtr,"%lg",&x); // consisting of all elements
Pattern[i][j]=x;
} /* endfor */
} /* endfor */
printf("Input patterns:\n");
for (i=0; i<NumPatterns; i++) {
printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);
} /* endfor */
printf("\n--------------------\n");
return SUCCESS;
}
//***************************************************************************
// InitClusters
// Arbitrarily assign a vector to each of the K clusters *
// We choose the first K vectors to do this
//***************************************************************************
void System::InitClusters(){
int i,j;
printf("Initial cluster centers:\n");
for (i=0; i<NumClusters; i++) {
Cluster[i].Member[0]=i;
for (j=0; j<SizeVector; j++) {
Cluster[i].Center[j]=Pattern[i][j];
} /* endfor */
} /* endfor */
for (i=0; i<NumClusters; i++) {
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
} /* endfor */
printf("\n");
}

void System::RunKMeans(){
int converged;
int pass;
pass=1;
converged=FALSE;
while (converged==FALSE) {
printf("PASS=%d\n",pass++);
DistributeSamples();
converged=CalcNewClustCenters();
ShowCenters();
} /* endwhile */
}

double System::EucNorm(int p, int c){ // Calc Euclidean norm of vector difference

double dist,x;
// between pattern vector, p, and cluster
int
i;
// center, c.
char zout[128];
char znum[40];
char *pnum;

pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
dist=0;
for (i=0; i<SizeVector ;i++){
x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
strcat(zout,f2a(x,4));
if (i==0)
strcat(zout,"+");
dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
} /* endfor */
printf("%s)\n",zout);
return dist;
}

int System::FindClosestCluster(int pat){
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++) {
d=EucNorm(pat,i);
printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
if (d<MinDist) {
MinDist=d;
ClustID=i;
} /* endif */
} /* endfor */
if (ClustID<0) {
printf("Aaargh");
exit(0);
} /* endif */
return ClustID;
}

void System::DistributeSamples(){
int i,pat,Clustid,MemberIndex;
//Clear membership list for all current clusters
for (i=0; i<NumClusters;i++){
Cluster[i].NumMembers=0;
}
for (pat=0; pat<NumPatterns; pat++) {
//Find cluster center to which the pattern is closest
Clustid= FindClosestCluster(pat);
printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
//post this pattern to the cluster
MemberIndex=Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex]=pat;
Cluster[Clustid].NumMembers++;
} /* endfor */
}

int System::CalcNewClustCenters(){
int ConvFlag,VectID,i,j,k;
double tmp[MAXVECTDIM];
char xs[255];
char ys[255];
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;
char *fpv;

pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
printf("The new cluster centers are now calculated as:\n");
for (i=0; i<NumClusters; i++) { //for each cluster
pnc1=itoa(Cluster[i].NumMembers,nc1,10);
pnc2=itoa(i,nc2,10);
strcpy(xs,"Cluster Center");
strcat(xs,nc2);
strcat(xs,"(1/");
strcpy(ys,"(1/");
strcat(xs,nc1);
strcat(ys,nc1);
strcat(xs,")(");
strcat(ys,")(");
for (j=0; j<SizeVector; j++) { // clear workspace
tmp[j]=0.0;
} /* endfor */
for (j=0; j<Cluster[i].NumMembers; j++) { //traverse member vectors
VectID=Cluster[i].Member[j];
for (k=0; k<SizeVector; k++) { //traverse elements of vector

tmp[k] += Pattern[VectID][k]; //
add (member) pattern elmnt into temp
if (k==0) {
strcat(xs,f2a(Pattern[VectID][k],3));
} else {
strcat(ys,f2a(Pattern[VectID][k],3));
} /* endif */
} /* endfor */
if(j<Cluster[i].NumMembers-1){
strcat(xs,"+");
strcat(ys,"+");
}
else {
strcat(xs,")");
strcat(ys,")");
}
} /* endfor */
for (k=0; k<SizeVector; k++) { //traverse elements of vector
tmp[k]=tmp[k]/Cluster[i].NumMembers;
if (tmp[k] != Cluster[i].Center[k])
ConvFlag=FALSE;
Cluster[i].Center[k]=tmp[k];
} /* endfor */
printf("%s,\n",xs);
printf("%s\n",ys);
} /* endfor */
return ConvFlag;
}

void System::ShowClusters(){
int cl;
for (cl=0; cl<NumClusters; cl++) {
printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);
} /* endfor */
}

void System::SaveClusters(char *fname){
}

main(int argc, char *argv[]) {
System kmeans;
if (argc<2) {
printf("USAGE: KMEANS PATTERN_FILE\n");
exit(0);
}
if (kmeans.LoadPatterns(argv[1])==FAILURE ){
printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1]);
exit(0);
}
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
}
第2个回答  2007-11-06
/*****
** kmeans.c
** - a simple k-means clustering routine
** - returns the cluster labels of the data points in an array
** - here's an example
** extern int *k_means(double**, int, int, int, double, double**);
** ...
** int *c = k_means(data_points, num_points, dim, 20, 1e-4, 0);
** for (i = 0; i < num_points; i++) {
** printf("data point %d is in cluster %d\n", i, c[i]);
** }
** ...
** free(c);
** Parameters
** - array of data points (double **data)
** - number of data points (int n)
** - dimension (int m)
** - desired number of clusters (int k)
** - error tolerance (double t)
** - used as the stopping criterion, i.e. when the sum of
** squared euclidean distance (standard error for k-means)
** of an iteration is within the tolerable range from that
** of the previous iteration, the clusters are considered
** "stable", and the function returns
** - a suggested value would be 0.0001
** - output address for the final centroids (double **centroids)
** - user must make sure the memory is properly allocated, or
** pass the null pointer if not interested in the centroids
** References
** - J. MacQueen, "Some methods for classification and analysis
** of multivariate observations", Fifth Berkeley Symposium on
** Math Statistics and Probability, 281-297, 1967.
** - I.S. Dhillon and D.S. Modha, "A data-clustering algorithm
** on distributed memory multiprocessors",
** Large-Scale Parallel Data Mining, 245-260, 1999.
** Notes
** - this function is provided as is with no warranty.
** - the author is not responsible for any damage caused
** either directly or indirectly by using this function.
** - anybody is free to do whatever he/she wants with this
** function as long as this header section is preserved.
** Created on 2005-04-12 by
** - Roger Zhang ([email protected])
** Modifications
** -
** Last compiled under Linux with gcc-3
*/

#include <stdlib.h>
#include <assert.h>
#include <float.h>
#include <math.h>

int *k_means(double **data, int n, int m, int k, double t, double **centroids)
{
/* output cluster label for each data point */
int *labels = (int*)calloc(n, sizeof(int));

int h, i, j; /* loop counters, of course :) */
int *counts = (int*)calloc(k, sizeof(int)); /* size of each cluster */
double old_error, error = DBL_MAX; /* sum of squared euclidean distance */
double **c = centroids ? centroids : (double**)calloc(k, sizeof(double*));
double **c1 = (double**)calloc(k, sizeof(double*)); /* temp centroids */

assert(data && k > 0 && k <= n && m > 0 && t >= 0); /* for debugging */

/****
** initialization */

for (h = i = 0; i < k; h += n / k, i++) {
c1[i] = (double*)calloc(m, sizeof(double));
if (!centroids) {
c[i] = (double*)calloc(m, sizeof(double));
}
/* pick k points as initial centroids */
for (j = m; j-- > 0; c[i][j] = data[h][j]);
}

/****
** main loop */

do {
/* save error from last step */
old_error = error, error = 0;

/* clear old counts and temp centroids */
for (i = 0; i < k; counts[i++] = 0) {
for (j = 0; j < m; c1[i][j++] = 0);
}

for (h = 0; h < n; h++) {
/* identify the closest cluster */
double min_distance = DBL_MAX;
for (i = 0; i < k; i++) {
double distance = 0;
for (j = m; j-- > 0; distance += pow(data[h][j] - c[i][j], 2));
if (distance < min_distance) {
labels[h] = i;
min_distance = distance;
}
}
/* update size and temp centroid of the destination cluster */
for (j = m; j-- > 0; c1[labels[h]][j] += data[h][j]);
counts[labels[h]]++;
/* update standard error */
error += min_distance;
}

for (i = 0; i < k; i++) { /* update all centroids */
for (j = 0; j < m; j++) {
c[i][j] = counts[i] ? c1[i][j] / counts[i] : c1[i][j];
}
}

} while (fabs(error - old_error) > t);

/****
** housekeeping */

for (i = 0; i < k; i++) {
if (!centroids) {
free(c[i]);
}
free(c1[i]);
}

if (!centroids) {
free(c);
}
free(c1);

free(counts);

return labels;
}
第3个回答  2007-11-26
您好!

楼上都写复杂了,还是用我这个吧!

main()
int System::FindClosestCluster(int pat){
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++) {
d=EucNorm(pat,i);
printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
if (d<MinDist) {
MinDist=d;
ClustID=i;
} /* endif */
} /* endfor */
if (ClustID<0) {
printf("Aaargh");
exit(0);
} /* endif */
return ClustID;
}

简单,清晰,利于系统执行;本回答被提问者采纳
第4个回答  2007-11-06
matlab 程序
http://www.cs.tut.fi/sgn/m2obsi/ex/kmeans.m
更多资源查看http://people.revoledu.com/kardi/tutorial/kMean/Resources.htm
帮你copy过来。
% kmeans.m : K-means clustering algorithm % Copyright (c) 2002 - 2003 % Jussi Tohka % Institute of Signal Processing % Tampere University of Technology % P.O. Box 553 FIN-33101 % Finland % [email protected] % ------------------------------- % Permission to use, copy, modify, and distribute this software % for any purpose and without fee is hereby % granted, provided that the above copyright notice appear in all % copies. The author and Tampere University of Technology make no representations % about the suitability of this software for any purpose. It is % provided "as is" without express or implied warranty. % ***************************************************************** % [c,costfunctionvalue, datalabels] = kmeans(data,k,c_init,max_iter) % Input: % data is the n x m matrix, where n is the number of data points % and m is their dimensionality. % k is the number of clusters % c_init is the initializations for cluster centres. This must be a % k x m matrix. (Optional, can be generated randomly). % max_iter is the maximum number of iterations of the algorithm % (Default 50). % Output: % c is the k x m matrix of final cluster centres. % costfunctionvalue is the value of cost function after each % iteration. % datalabels is a n x 1 vector of labeling of data. function [c,costfunctionvalue, datalabels,inter] = kmeans(data,k,varargin); datasize = size(data); n = datasize(1); m = datasize(2); if n < m fprintf(1,'Error: The number of datapoints must be greater than \n'); fprintf(1,'their dimension. \n'); return; end if length(varargin) > 0 c_init = varargin{1}; else % First, select k random numbers from 1 to n WITHOUT repetition randomindex = zeros(k,1); for i = 1:k randomindex(i) = unidrnd(n + 1 - i); randomindex(i) = randomindex(i) + sum(randomindex(1:i-1) <= ... randomindex(i)); end c_init = data(randomindex,:); end if size(c_init) ~= [k m]; fprintf(1,'Error: The size of c_init is incorrect.'); end if length(varargin) > 1 max_iter = varargin{2}; else max_iter = 50; end % Start the algorithm iter = 0; changes = 1; distances = zeros(n,k); costfunctionvalue = zeros(max_iter + 1,1); c = c_init; datalabels = zeros(n,1); while iter < max_iter & changes iter = iter + 1; fprintf(1,'#'); old_datalabels = datalabels; % Compute the distances between cluster centres and datapoints for i = 1:k dist(:,i) = sum((data - repmat(c(i,:),n,1)).^2,2); end % Label data points based on the nearest cluster centre [tmp,datalabels] = min(dist,[],2); % compute the cost function value costfunctionvalue(iter) = sum(tmp); % calculate the new cluster centres for i = 1:k c(i,:) = mean(data(find(datalabels == i),:)); end % study whether the labels have changed changes = sum(old_datalabels ~= datalabels); inter(iter).datalabels = datalabels; inter(iter).c = c; end for i = 1:k dist(:,i) = sum((data - repmat(c(i,:),n,1)).^2,2); end [tmp,datalabels] = min(dist,[],2); % compute the cost function value costfunctionvalue(iter + 1) = sum(tmp); fprintf(1,'\n');
参考资料:http://people.revoledu.com/kardi/tutorial/kMean/Resources.htm
相似回答