#2018-04-05 16:57:26 April Thursday the 14 week, the 095 day SZ SSMR
python 】 . 、 、
1.
2.
3.
4.Kmeans
.
Python Sklearn —— 。 ,
、 、 、 。
Python Sklearn —— 。 , 、 、 、 。
# iris
from sklearn.datasets import load_iris
#
iris = load_iris()
#
#print iris.data
target , data , 150, 3 , 3 。 :
Iris Setosa( )
Iris Versicolour( )
Iris Virginica( )
.
, Pandas 。
, 、 , , 。
import pandas
# iris
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names) # csv
print(dataset.describe())
# histograms
dataset.hist()
dataset.plot() , , x 、y 。
import pandas
# iris
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names) # csv
print(dataset.describe())
dataset.plot(x='sepal-length', y='sepal-width', kind='scatter')
dataset.plot(kind='kde') KDE ,KDE (Kernel Density Estimate, )。
import pandas
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names) # csv
print(dataset.describe())
dataset.plot(kind='kde')
dataset.plot() kind='box' , (y ) , , , 。
import pandas
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names) # csv
print(dataset.describe())
dataset.plot(kind='kde')
dataset.plot(kind='box', subplots=True, layout=(2,2),
sharex=False, sharey=False)
radviz() 、andrews_curves() parallel_coordinates() , petal-length , :
import pandas
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names)
from pandas.tools.plotting import radviz
radviz(dataset, 'class')
from pandas.tools.plotting import andrews_curves
andrews_curves(dataset, 'class')
from pandas.tools.plotting import parallel_coordinates
parallel_coordinates(dataset, 'class')
, , , , 。
import pandas
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names)
from pandas.tools.plotting import scatter_matrix
scatter_matrix(dataset, alpha=0.2, figsize=(6, 6), diagonal='kde')
.
, x y
from sklearn.datasets import load_iris
hua = load_iris()
#
x = [n[0] for n in hua.data]
y = [n[1] for n in hua.data]
import numpy as np #
x = np.array(x).reshape(len(x),1)
y = np.array(y).reshape(len(y),1)
Sklearn ,
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
clf.fit(x,y)
pre = clf.predict(x)
Matplotlib
#
import matplotlib.pyplot as plt
plt.scatter(x,y,s=100)
plt.plot(x,pre,"r-",linewidth=4)
for idx, m in enumerate(x):
plt.plot([m,m],[y[idx],pre[idx]], 'g-')
plt.show()
, 。 , , 。
, , :
print(u" ", clf.coef_)
print (u" ", clf.intercept_)
print (np.mean(y-pre)**2)
# [[-0.05726823]]
# [ 3.38863738]
# 1.91991214088e-31
5.0 , , , [3.10229621]。
print(clf.predict([[5.0]]))
.
:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
iris = load_iris()
clf = DecisionTreeClassifier()
clf.fit(iris.data, iris.target)
#print clf
predicted = clf.predict(iris.data)
#
X = iris.data
L1 = [x[0] for x in X]
#print L1
L2 = [x[1] for x in X]
#print L2
import numpy as np
import matplotlib.pyplot as plt
plt.scatter(L1, L2, c=predicted, marker='x') #cmap=plt.cm.Paired
plt.title("DTC")
plt.show()
70% ,30% , 70% 0-40、50-90、100-140 ,30% 40-50、90-100、140-150 。 、 , :
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
import numpy as np
iris = load_iris()
#
train_data = np.concatenate((iris.data[0:40, :], iris.data[50:90, :], iris.data[100:140, :]), axis = 0)
train_target = np.concatenate((iris.target[0:40], iris.target[50:90], iris.target[100:140]), axis = 0)
#
test_data = np.concatenate((iris.data[40:50, :], iris.data[90:100, :], iris.data[140:150, :]), axis = 0)
test_target = np.concatenate((iris.target[40:50], iris.target[90:100], iris.target[140:150]), axis = 0)
#
clf = DecisionTreeClassifier()
clf.fit(train_data, train_target)
predict_target = clf.predict(test_data)
#print predict_target
#
print(sum(predict_target == test_target) )
# F
from sklearn import metrics
print(metrics.classification_report(test_target,predict_target))
print(metrics.confusion_matrix(test_target,predict_target))
X = test_data
L1 = [n[0] for n in X]
#print L1
L2 = [n[1] for n in X]
#print L2
import numpy as np
import matplotlib.pyplot as plt
plt.scatter(L1, L2, c=predict_target, marker='x') #cmap=plt.cm.Paired
plt.title("DecisionTreeClassifier")
plt.show()
. Kmeans
KMeans , ( ), , “ , ” 。 :
# -*- coding: utf-8 -*-
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
iris = load_iris()
clf = KMeans()
clf.fit(iris.data, iris.target)
#print clf
predicted = clf.predict(iris.data)
#
X = iris.data
L1 = [x[0] for x in X]
#print L1
L2 = [x[1] for x in X]
#print L2
import numpy as np
import matplotlib.pyplot as plt
plt.scatter(L1, L2, c=predicted, marker='s',s=20,cmap=plt.cm.Paired)
plt.title("DTC")
plt.show()