k-means clustering K平均アルゴリズム

3231 ワード

このアルゴリズムの主な役割は、画面上の多くの点を、隣接する点を彼に最も近い点に集めることです.
k-means algorithmアルゴリズムはクラスタリングアルゴリズムであり,n個のオブジェクトを彼らの属性に基づいてk個の分割,kphp実装アルゴリズムコードは以下の通りである.
class Cluster
{
  public $points;
  public $avgPoint;
  function calculateAverage($maxX, $maxY)
  {
    if (count($this->points)==0)
    {
        $this->avgPoint->x = rand(0, $maxX);
        $this->avgPoint->y =  rand(0,$maxY);
        //we didn't get any clues at all :( lets just randomize and hope for better...
        return;
    }
     foreach($this->points as $p)
        {
         $xsum += $p->x;
         $ysum += $p->y;
        }

      $count = count($this->points);
      $this->avgPoint->x =  $xsum / $count;
      $this->avgPoint->y =  $ysum / $count;
  }
}

class Point
{
  public $x;
  public $y;
  function getDistance($p)
        {
         $x1 = $this->x - $p->x;
         $y1 = $this->y - $p->y;
         return sqrt($x1*$x1 + $y1*$y1);
        }
}

function distributeOverClusters($k, $arr)
{
 foreach($arr as $p)
        { if ($p->x > $maxX)
                $maxX = $p->x;
          if ($p->y > $maxY)
                $maxY = $p->y;
        }
  $clusters = array();
  for($i = 0; $i < $k; $i++)
        {
         $clusters[] = new Cluster();
         $tmpP = new Point();
         $tmpP->x=rand(0,$maxX);
         $tmpP->y=rand(0,$maxY);
         $clusters[$i]->avgPoint = $tmpP;
        }
  #deploy points to closest center.
  #recalculate centers
  for ($a = 0; $a < 200; $a++) # run it 200 times
  {
        foreach($clusters as $cluster)
                $cluster->points = array(); //reinitialize
        foreach($arr as $pnt)
        {
           $bestcluster=$clusters[0];
           $bestdist = $clusters[0]->avgPoint->getDistance($pnt);

           foreach($clusters as $cluster)
                {
                        if ($cluster->avgPoint->getDistance($pnt) < $bestdist)
                        {
                                $bestcluster = $cluster;
                                $bestdist = $cluster->avgPoint->getDistance($pnt);
                        }
                }
                $bestcluster->points[] = $pnt;//add the point to the best cluster.
        }
        //recalculate the centers.
        foreach($clusters as $cluster)
                $cluster->calculateAverage($maxX, $maxY);

  }
  return $clusters;
}

$p = new Point();
$p->x = 2;
$p->y = 2;
$p2 = new Point();
$p2->x = 3;
$p2->y = 2;
$p3 = new  Point();
$p3->x = 8;
$p3->y = 2;
$arr[] = $p;
$arr[] = $p2;
$arr[] = $p3;
var_dump(distributeOverClusters(2, $arr));



:http://en.wikipedia.org/wiki/K-means_clustering