Katama hashアルゴリズムのC#実装

6159 ワード

Katama hashは分散ソリューションでよく見られるアルゴリズムであり,このアルゴリズムや他のhash整合性アルゴリズムについてネット上で多くの記事で紹介されている.
この間ちょうど分布式システムを作る時にこのアルゴリズムを実現する必要があって、ネット上で探して、C#で実現するのはすべてあまりよくないことを発見しました.
検索結果が最も前に最も多かった実装があり,性能は最適化されておらず,コードの可読性もあまりよくない.
そして各C#のmemcached libraryの実装はまた結合がきついので、自分で次のコード(この友人の実装http://www.cnblogs.com/daizhj/archive/2010/08/24/1807324.htmlを参照)とBeitの実装をしました.

using System;
using System.Collections.Generic;
using System.Text;
using System.Security.Cryptography;

namespace Clover
{
    public sealed class KetamaHash
    {
        private int[] Values = null;
        private string[] Nodes = null;

        public KetamaHash(IEnumerable<string> nodes, int copyNodes = 10000)
        {
            Refresh(nodes, copyNodes);
        }

        /// <summary>
        ///           ，              ，                     
        /// </summary>
        /// <param name="nodes"></param>
        /// <param name="copyNodes"></param>
        public void Refresh(IEnumerable<string> nodes, int copyNodes = 10000)
        {
            if (nodes == null)
            {
                throw new ArgumentNullException("nodes");
            }
            if (copyNodes <= 0)
            {
                throw new ArgumentOutOfRangeException("virualNodes");
            }

            SortedList<int, string> dict = new SortedList<int, string>();
            HashSet<string> sortedNodes = new HashSet<string>();
            foreach (var item in nodes)
            {
                if (item != null)
                    sortedNodes.Add(item);
            }

            if ((sortedNodes.Count * copyNodes) > 320 * 1000)
            {
                throw new ArgumentOutOfRangeException("There is too many copyNodes or real nodes! nodes.Count multiply copyNodes must be not greater than 320*1000 ");
            }

            foreach (var node in sortedNodes)
            {
                for (int i = 0; i < copyNodes / 4; i++)
                {
                    byte[] digest = Hash(node + "_" + i);
                    for (int h = 0; h < 4; h++)
                    {
                        int m = BitConverter.ToInt32(digest, 0 * 4);
                        dict[m] = node;
                    }
                }
            }

            var newValues = new int[dict.Keys.Count];
            var newNodes = new string[dict.Keys.Count];
            dict.Keys.CopyTo(newValues, 0);
            dict.Values.CopyTo(newNodes, 0);

            Values = newValues; // thread not safty
            Nodes = newNodes; // thread not safty

        }

        public string GetNodeByKey(string key)
        {
            int value = BitConverter.ToInt32(Hash(key), 0); //first 4 byte to int32
            int result = Array.BinarySearch<int>(Values, value);
            if (result < 0)
                result = ~result;
            if (result >= Nodes.Length)
                return Nodes[Nodes.Length - 1];
            else
                return Nodes[result];
        }

        #region Private Supported Method
        private byte[] Hash(byte[] source)
        {
            HashAlgorithm helper = new MD5CryptoServiceProvider();
            return helper.ComputeHash(source);
        }
        private byte[] Hash(string s)
        {
            return Hash(Encoding.UTF8.GetBytes(s));
        }
        #endregion
    }
}

テストコードは次のとおりです.

  static void Main(string[] args)
        {
            List<string> nodes = new List<string>();
            for (int i = 0; i < 16; i++)
            {
                nodes.Add(Guid.NewGuid().ToString());//       。。。。    
            }
            KetamaHash target = new KetamaHash(nodes, 10000);

            Dictionary<string, int> dict = new Dictionary<string, int>();
            Stopwatch sw = new Stopwatch();
            sw.Start();
            for (int i = 0; i < 1000 * 1000; i++)//      
            {
                var result = target.GetNodeByKey(Guid.NewGuid().ToString());//       。。。。    
                if (result == null)
                {
                    throw new Exception("     ");
                }
                if (dict.ContainsKey(result))
                {
                    dict[result]++;
                }
                else
                {
                    dict[result] = 1;
                }
            }
            sw.Stop();

            long maxNumber = dict.Values.Max();
            long minNumber = dict.Values.Min();

            double temp = (maxNumber - minNumber) / Convert.ToDouble(maxNumber);

            Console.WriteLine(temp);
            Console.WriteLine(sw.ElapsedMilliseconds);

            if (temp >= 0.1)
            {
                Console.WriteLine("       ，             ");
            }
            if (sw.ElapsedMilliseconds >= 12 * 1000)
            {
                Console.WriteLine("    ....              。。。。  ～");
            }


        }

　　
仮想ノードが多ければ多いほど、データの割り当ては均一になりますが、性能も相対的に悪いので、こちらは10000を使うことをお勧めします.割り当ては比較的均一で、速度も遅くありません.
性能テストにより95%以上の性能がMD 5アルゴリズムに消費する、
MD 5のhashアルゴリズムを置き換えると性能がよくなります...

多項式求和式私が書いたコードはどうしてptaはいつも私の大きいデータの浮動小数点の間違いを言います

マルチスレッドおよびマルチスレッドにおける様々な問題