|
1 | 1 | """Learn to estimate functions from examples. (Chapters 18-20)"""
|
2 | 2 |
|
3 | 3 | from utils import *
|
4 |
| -import random |
| 4 | +import heapq, random |
5 | 5 |
|
6 | 6 | #______________________________________________________________________________
|
7 | 7 |
|
@@ -202,26 +202,11 @@ def N(targetval, attr, attrval):
|
202 | 202 |
|
203 | 203 | def NearestNeighborLearner(dataset, k=1):
|
204 | 204 | "k-NearestNeighbor: the k nearest neighbors vote."
|
205 |
| - if k == 1: |
206 |
| - def predict(example): |
207 |
| - "Predict according to the point closest to example." |
208 |
| - neighbor = argmin(dataset.examples, |
209 |
| - lambda e: dataset.distance(e, example)) |
210 |
| - return neighbor[dataset.target] |
211 |
| - else: |
212 |
| - def predict(example): |
213 |
| - "Find the k closest, and have them vote for the best." |
214 |
| - ## Maintain a sorted list of (distance, example) pairs. |
215 |
| - ## For very large k, a PriorityQueue would be better |
216 |
| - best = [] |
217 |
| - for e in dataset.examples: |
218 |
| - d = dataset.distance(e, example) |
219 |
| - if len(best) < k: |
220 |
| - best.append((d, e)) |
221 |
| - elif d < best[-1][0]: |
222 |
| - best[-1] = (d, e) |
223 |
| - best.sort() |
224 |
| - return mode([e[dataset.target] for (d, e) in best]) |
| 205 | + def predict(example): |
| 206 | + "Find the k closest, and have them vote for the best." |
| 207 | + best = heapq.nsmallest(k, ((dataset.distance(e, example), e) |
| 208 | + for e in dataset.examples)) |
| 209 | + return mode([e[dataset.target] for (d, e) in best]) |
225 | 210 | return predict
|
226 | 211 |
|
227 | 212 | #______________________________________________________________________________
|
|
0 commit comments