使用随机森林对鸾尾花进行分类

导入所需的包

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

##对种类进行需处理三种花分别为0,1,2

def iris_type(s):
    it = {b'Iris-setosa': 0,
              b'Iris-versicolor': 1,
              b'Iris-virginica': 2}
    return it[s]

建立随机森林学习模型

‘花萼长度’,’花萼宽度’,’花瓣长度’,’花瓣宽度’，四种特种进行两两匹配

iris_feature = ‘花萼长度’,’花萼宽度’,’花瓣长度’,’花瓣宽度’

mpl.rcParams['font.sans-serif'] = [u'simHei']
   mpl.rcParams['axes.unicode_minus'] = False


   path = 'iris.csv'
   data = np.loadtxt(path,dtype=float,delimiter=',',converters={4:iris_type})
   x_prime,y = np.split(data,(4,),axis=1)

   features_paris = [(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)]
   plt.figure(figsize=(10,9),facecolor='#FFFFFF')
   for i,pair in enumerate(features_paris):
       #准备数据
       x = x_prime[:,pair]
       #随机森林学习
       clf = RandomForestClassifier(n_estimators=200,criterion='entropy',max_depth=3)
       dt_clf = clf.fit(x,y.ravel())

       #画图
       N,M = 500,500
       x1_min,x1_max = x[:,0].min(),x[:,0].max()# 第0列的范围
       x2_min, x2_max = x[:, 1].min(), x[:, 1].max()  # 第1列的范围
       t1 = np.linspace(x1_min, x1_max, N)
       t2 = np.linspace(x2_min, x2_max, M)
       x1, x2 = np.meshgrid(t1, t2)  # 生成网格采样点
       x_test = np.stack((x1.flat, x2.flat), axis=1)  # 测试点


       #训练集上的预测结果
       y_hat = dt_clf.predict(x)
       y = y.reshape(-1)
       c = np.count_nonzero(y_hat==y)# 统计预测正确的个数

       print('特征：  ', iris_feature[pair[0]], ' + ', iris_feature[pair[1]],)
       print('\t预测正确数目：', c,)
       print('\t准确率: %.2f%%' % (100 * float(c) / float(len(y))))

特征：花萼长度 + 花萼宽度
预测正确数目： 123
准确率: 82.00%
特征：花萼长度 + 花瓣长度
预测正确数目： 142
准确率: 94.67%
特征：花萼长度 + 花瓣宽度
预测正确数目： 145
准确率: 96.67%
特征：花萼宽度 + 花瓣长度
预测正确数目： 143
准确率: 95.33%
特征：花萼宽度 + 花瓣宽度
预测正确数目： 144
准确率: 96.00%
特征：花瓣长度 + 花瓣宽度
预测正确数目： 145
准确率: 96.67%

学习结果可视化

#显示
        cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
        cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
        y_hat = dt_clf.predict(x_test)  # 预测值
        y_hat = y_hat.reshape(x1.shape)  # 使之与输入的形状相同
        plt.subplot(2, 3, i + 1)
        plt.pcolormesh(x1, x2, y_hat, cmap=cm_light)  # 预测值
        plt.scatter(x[:, 0], x[:, 1], c=y, edgecolors='k', cmap=cm_dark)  # 样本
        plt.xlabel(iris_feature[pair[0]], fontsize=14)
        plt.ylabel(iris_feature[pair[1]], fontsize=14)
        plt.xlim(x1_min, x1_max)
        plt.ylim(x2_min, x2_max)
        plt.grid()
    plt.suptitle(u'随机森林对鸢尾花数据的两特征组合的分类结果', fontsize=18)
    plt.tight_layout(2)
    plt.subplots_adjust(top=0.92)
    plt.show()