You've already forked neighbours
add regressor
This commit is contained in:
49
demo/regressor_demo.py
Normal file
49
demo/regressor_demo.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import neighbours as ns
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
|
||||
|
||||
# function for generating a synthetic regression problem
|
||||
def f(x):
|
||||
if x > 40:
|
||||
return math.log(x, 2) - 6
|
||||
else:
|
||||
return math.cos(x * 0.1)
|
||||
|
||||
|
||||
# generate x coordinates
|
||||
X = [[i + random.uniform(-1, 1)] for i in np.arange(start=1, stop=100, step=1)]
|
||||
|
||||
# calculate corresponding y coordinates
|
||||
y = [f(i[0]) + random.uniform(-0.1, 0.1) for i in X]
|
||||
|
||||
# convert to numpy arrays
|
||||
X = np.array(X)
|
||||
y = np.array(y)
|
||||
|
||||
# generate x coordinates for demo plot
|
||||
x_points = np.arange(start=0, stop=100, step=0.1)
|
||||
X_demo = np.array([[x] for x in x_points])
|
||||
|
||||
# create a regressor then load data
|
||||
regressor = ns.KNNRegressor(1, 10, 7)
|
||||
regressor.load(X, y)
|
||||
|
||||
# create an array to store predicted y values for demo plot
|
||||
y_predicted = []
|
||||
|
||||
# get predictions for all samples in X_demo
|
||||
for sample in X_demo:
|
||||
predicted_value = regressor.predict(sample, ns.distance.euclidean, ns.kernel.gaussian, 3)
|
||||
y_predicted.append(predicted_value)
|
||||
|
||||
# plot train points
|
||||
plt.plot(X, y, 'bo')
|
||||
|
||||
# plot predicted y against x
|
||||
plt.plot(x_points, y_predicted, 'r')
|
||||
|
||||
plt.show()
|
||||
@@ -1,3 +1,4 @@
|
||||
from .knn_classifier import *
|
||||
from .classifier import *
|
||||
from .regressor import *
|
||||
from . import distance
|
||||
from . import kernel
|
||||
|
||||
51
neighbours/regressor.py
Normal file
51
neighbours/regressor.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
|
||||
from .rp_neighbours import *
|
||||
from .exceptions import *
|
||||
|
||||
|
||||
class KNNRegressor:
|
||||
def __init__(self, features, trees_count, rpt_m):
|
||||
self.features = features
|
||||
self.forest = RPTForest(features, trees_count, rpt_m)
|
||||
self.targets = None
|
||||
|
||||
def load(self, points, targets):
|
||||
"""Loads train data, builds a corresponding forest
|
||||
|
||||
:param points: np.ndarray of train samples
|
||||
:param targets: an array of target values corresponding to loaded train points
|
||||
"""
|
||||
|
||||
if not isinstance(points, np.ndarray):
|
||||
raise InvalidType("points should be represented as np.ndarray")
|
||||
|
||||
if not isinstance(targets, np.ndarray) and not isinstance(targets, list):
|
||||
raise InvalidType("targets should be represented as np.ndarray or list")
|
||||
|
||||
self.targets = targets
|
||||
|
||||
if points.ndim != 2:
|
||||
raise InvalidDimensionError("points array should be two-dimensional")
|
||||
|
||||
if points.shape[1] != self.features:
|
||||
raise InvalidDimensionError(
|
||||
"invalid number of features in sample (expected {}, got {})".format(self.features, points.shape[1])
|
||||
)
|
||||
|
||||
self.forest.load(points)
|
||||
|
||||
def predict(self, point: np.ndarray, distance, kernel, h):
|
||||
nearest_point_indexes = self.forest.get_neighbours(point)
|
||||
|
||||
# Nadaraya-Watson estimator
|
||||
|
||||
numerator = float(0)
|
||||
denominator = float(0.0000001)
|
||||
|
||||
for point_ix in nearest_point_indexes:
|
||||
weight = kernel(distance(point, self.forest.get_point(point_ix)) / h)
|
||||
numerator += weight * self.targets[point_ix]
|
||||
denominator += weight
|
||||
|
||||
return numerator / denominator
|
||||
Reference in New Issue
Block a user