1 import numpy as np
  2 import matplotlib.pyplot as plt
  3 from .plot_helpers import cm2, cm3, discrete_scatter
  4 
  5 def _call_classifier_chunked(classifier_pred_or_decide, X):
  6 # The chunk_size is used to chunk the large arrays to work with x86
  7 # memory models that are restricted to < 2 GB in memory allocation. The
  8 # chunk_size value used here is based on a measurement with the
  9 # MLPClassifier using the following parameters:
 10 # MLPClassifier(solver='lbfgs', random_state=0,
 11 # hidden_layer_sizes=[1000,1000,1000])
 12 # by reducing the value it is possible to trade in time for memory.
 13 # It is possible to chunk the array as the calculations are independent of
 14 # each other.
 15 # Note: an intermittent version made a distinction between
 16 # 32- and 64 bit architectures avoiding the chunking. Testing revealed
 17 # that even on 64 bit architectures the chunking increases the
 18 # performance by a factor of 3-5, largely due to the avoidance of memory
 19 # swapping.
 20 chunk_size = 10000
 21 
 22 # We use a list to collect all result chunks
 23 Y_result_chunks = []
 24 
 25 # Call the classifier in chunks.
 26 for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0],
 27 chunk_size, dtype=np.int32),
 28 axis=0):
 29 Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
 30 
 31 return np.concatenate(Y_result_chunks)
 32 
 33 
 34 def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
 35 alpha=1, cm=cm3):
 36 # multiclass
 37 if eps is None:
 38 eps = X.std() / 2.
 39 
 40 if ax is None:
 41 ax = plt.gca()
 42 
 43 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
 44 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
 45 xx = np.linspace(x_min, x_max, 1000)
 46 yy = np.linspace(y_min, y_max, 1000)
 47 
 48 X1, X2 = np.meshgrid(xx, yy)
 49 X_grid = np.c_[X1.ravel(), X2.ravel()]
 50 decision_values = classifier.predict(X_grid)
 51 ax.imshow(decision_values.reshape(X1.shape), extent=(x_min, x_max,
 52 y_min, y_max),
 53 aspect='auto', origin='lower', alpha=alpha, cmap=cm)
 54 ax.set_xlim(x_min, x_max)
 55 ax.set_ylim(y_min, y_max)
 56 ax.set_xticks(())
 57 ax.set_yticks(())
 58 
 59 
 60 def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis",
 61 function=None):
 62 # binary with fill
 63 if eps is None:
 64 eps = X.std() / 2.
 65 
 66 if ax is None:
 67 ax = plt.gca()
 68 
 69 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
 70 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
 71 xx = np.linspace(x_min, x_max, 100)
 72 yy = np.linspace(y_min, y_max, 100)
 73 
 74 X1, X2 = np.meshgrid(xx, yy)
 75 X_grid = np.c_[X1.ravel(), X2.ravel()]
 76 if function is None:
 77 function = getattr(classifier, "decision_function",
 78 getattr(classifier, "predict_proba"))
 79 else:
 80 function = getattr(classifier, function)
 81 decision_values = function(X_grid)
 82 if decision_values.ndim > 1 and decision_values.shape[1] > 1:
 83 # predict_proba
 84 decision_values = decision_values[:, 1]
 85 grr = ax.imshow(decision_values.reshape(X1.shape),
 86 extent=(x_min, x_max, y_min, y_max), aspect='auto',
 87 origin='lower', alpha=alpha, cmap=cm)
 88 
 89 ax.set_xlim(x_min, x_max)
 90 ax.set_ylim(y_min, y_max)
 91 ax.set_xticks(())
 92 ax.set_yticks(())
 93 return grr
 94 
 95 
 96 def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
 97 cm=cm2, linewidth=None, threshold=None,
 98 linestyle="solid"):
 99 # binary?
100 if eps is None:
101 eps = X.std() / 2.
102 
103 if ax is None:
104 ax = plt.gca()
105 
106 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
107 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
108 xx = np.linspace(x_min, x_max, 1000)
109 yy = np.linspace(y_min, y_max, 1000)
110 
111 X1, X2 = np.meshgrid(xx, yy)
112 X_grid = np.c_[X1.ravel(), X2.ravel()]
113 if hasattr(classifier, "decision_function"):
114 decision_values = _call_classifier_chunked(classifier.decision_function,
115 X_grid)
116 levels = [0] if threshold is None else [threshold]
117 fill_levels = [decision_values.min()] + levels + [
118 decision_values.max()]
119 else:
120 # no decision_function
121 decision_values = _call_classifier_chunked(classifier.predict_proba,
122 X_grid)[:, 1]
123 levels = [.5] if threshold is None else [threshold]
124 fill_levels = [0] + levels + [1]
125 if fill:
126 ax.contourf(X1, X2, decision_values.reshape(X1.shape),
127 levels=fill_levels, alpha=alpha, cmap=cm)
128 else:
129 ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
130 colors="black", alpha=alpha, linewidths=linewidth,
131 linestyles=linestyle, zorder=5)
132 
133 ax.set_xlim(x_min, x_max)
134 ax.set_ylim(y_min, y_max)
135 ax.set_xticks(())
136 ax.set_yticks(())
137 
138 
139 if __name__ == '__main__':
140 from sklearn.datasets import make_blobs
141 from sklearn.linear_model import LogisticRegression
142 X, y = make_blobs(centers=2, random_state=42)
143 clf = LogisticRegression(solver='lbfgs').fit(X, y)
144 plot_2d_separator(clf, X, fill=True)
145 discrete_scatter(X[:, 0], X[:, 1], y)
146 plt.show()
Logo

开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!

更多推荐