pythonは正規分布データを生成し、図形描画と解析を行います。


1、正規分布データを生成し、確率分布図を作成する

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


#     、   ,             
def normfun(x, mu, sigma):
  pdf = np.exp(-((x - mu)**2)/(2*sigma**2)) / (sigma * np.sqrt(2*np.pi))
  return pdf


# result = np.random.randint(-65, 80, size=100) #    ,   ,  
result = np.random.normal(15, 44, 100) #    0.5,   1
print(result)

x = np.arange(min(result), max(result), 0.1)
#    y  ,           
print(result.mean(), result.std())
y = normfun(x, result.mean(), result.std())
plt.plot(x, y) #                

#                 
plt.hist(result, bins=10, rwidth=0.8, density=True) # bins    ,   rwidth(0~1),=1    
plt.title('distribution')
plt.xlabel('temperature')
plt.ylabel('probability')
#   
plt.show() #           1                ,                  

範囲に応じて正規分布を生成します。

result = np.random.randint(-65, 80, size=100) #    ,   ,  
平均値、分散に基づいて正規分布を生成します。

result = np.random.normal(15, 44, 100) #    0.5,   1
2、一つのシーケンスが正規分布に合っているかどうかを判断する。

import numpy as np
from scipy import stats


pts = 1000
np.random.seed(28041990)
a = np.random.normal(0, 1, size=pts) #   1     ,   0,    1,100  
b = np.random.normal(2, 1, size=pts) #   1     ,   2,    1, 100  
x = np.concatenate((a, b)) #            ,               
k2, p = stats.normaltest(x)
alpha = 1e-3
print("p = {:g}".format(p))


#    :x       
if p < alpha: # null hypothesis: x comes from a normal distribution
  print("The null hypothesis can be rejected") #        ,       
else:
  print("The null hypothesis cannot be rejected") #         ,      
3、信頼区間、異常値を求める

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd


#          
def get_outer_data(data_list):
  df = pd.DataFrame(data_list, columns=['value'])
  df = df.iloc[:, 0]
  #             
  Q1 = df.quantile(q=0.25)
  Q3 = df.quantile(q=0.75)

  #   1.5               
  low_whisker = Q1 - 1.5 * (Q3 - Q1)
  up_whisker = Q3 + 1.5 * (Q3 - Q1)

  #      
  kk = df[(df > up_whisker) | (df < low_whisker)]
  data1 = pd.DataFrame({'id': kk.index, '   ': kk})
  return data1


N = 100
result = np.random.normal(0, 1, N)
# result = np.random.randint(-65, 80, size=N) #    ,   ,  
mean, std = result.mean(), result.std(ddof=1) #        

#       ,   0.9     
conf_intveral = stats.norm.interval(0.9, loc=mean, scale=std) # 90%  
print('    :', conf_intveral)

x = np.arange(0, len(result), 1)

#     
outer = get_outer_data(result)
print(outer, type(outer))
x1 = outer.iloc[:, 0]
y1 = outer.iloc[:, 1]
plt.scatter(x1, y1, marker='x', color='r') #      
plt.scatter(x, result, marker='.', color='g') #    
plt.plot([0, len(result)], [conf_intveral[0], conf_intveral[0]])
plt.plot([0, len(result)], [conf_intveral[1], conf_intveral[1]])
plt.show()

4、サンプリングポイントの離散図と確率図

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import time


print(time.strftime('%Y-%m-%D %H:%M:%S'))


#     、   ,             
def _normfun(x, mu, sigma):
  pdf = np.exp(-((x - mu)**2)/(2*sigma**2)) / (sigma * np.sqrt(2*np.pi))
  return pdf


#          
def get_outer_data(data_list):
  df = pd.DataFrame(data_list, columns=['value'])
  df = df.iloc[:, 0]
  #             
  Q1 = df.quantile(q=0.25)
  Q3 = df.quantile(q=0.75)

  #   1.5               
  low_whisker = Q1 - 1.5 * (Q3 - Q1)
  up_whisker = Q3 + 1.5 * (Q3 - Q1)

  #      
  kk = df[(df > up_whisker) | (df < low_whisker)]
  data1 = pd.DataFrame({'id': kk.index, '   ': kk})
  return data1


N = 100
result = np.random.normal(0, 1, N)
# result = np.random.randint(-65, 80, size=N) #    ,   ,  
# result = [100]*100 #      
# result = np.array(result)
mean, std = result.mean(), result.std(ddof=1) #        
#       ,   0.9     
if std == 0: #              0         
  conf_intveral = [min(result)-1, max(result)+1]
else:
  conf_intveral = stats.norm.interval(0.9, loc=mean, scale=std) # 90%  
# print('    :', conf_intveral)
#     
outer = get_outer_data(result)
#      
fig = plt.figure()
fig.add_subplot(2, 1, 1)
plt.subplots_adjust(hspace=0.3)
x = np.arange(0, len(result), 1)
plt.scatter(x, result, marker='.', color='g') #       
plt.scatter(outer.iloc[:, 0], outer.iloc[:, 1], marker='x', color='r') #       
plt.plot([0, len(result)], [conf_intveral[0], conf_intveral[0]]) #       
plt.plot([0, len(result)], [conf_intveral[1], conf_intveral[1]]) #       
plt.text(0, conf_intveral[0], '{:.2f}'.format(conf_intveral[0])) #         
plt.text(0, conf_intveral[1], '{:.2f}'.format(conf_intveral[1])) #         
info = 'outer count:{}'.format(len(outer.iloc[:, 0]))
plt.text(min(x), max(result)-((max(result)-min(result)) / 2), info) #       
plt.xlabel('sample count')
plt.ylabel('value')
#      
if std != 0: #          
  fig.add_subplot(2, 1, 2)
  x = np.arange(min(result), max(result), 0.1)
  y = _normfun(x, result.mean(), result.std())
  plt.plot(x, y) #                
  plt.hist(result, bins=10, rwidth=0.8, density=True) # bins    ,   rwidth(0~1),=1    
  info = 'mean:{:.2f}
std:{:.2f}
mode num:{:.2f}'.format(mean, std, np.median(result)) plt.text(min(x), max(y) / 2, info) plt.xlabel('value') plt.ylabel('Probability') else: fig.add_subplot(2, 1, 2) info = 'non-normal distribution!!
mean:{:.2f}
std:{:.2f}
mode num:{:.2f}'.format(mean, std, np.median(result)) plt.text(0.5, 0.5, info) plt.xlabel('value') plt.ylabel('Probability') plt.savefig('./distribution.jpg') plt.show() print(time.strftime('%Y-%m-%D %H:%M:%S'))

以上はpythonが正規分布データを生成し、図形描画と解析の詳細です。python正規分布に関する資料は他の関連記事に注目してください。