Merge pull request #2 from hashlag/regressor

add regressor
This commit is contained in:
hashlag
2024-02-04 17:53:46 +03:00
committed by GitHub
4 changed files with 130 additions and 1 deletions

49
demo/regressor_demo.py Normal file
View File

@@ -0,0 +1,49 @@
import matplotlib.pyplot as plt
import neighbours as ns
import numpy as np
import random
import math
# function for generating a synthetic regression problem
def f(x):
if x > 40:
return math.log(x, 2) - 6
else:
return math.cos(x * 0.1)
# generate x coordinates
X = [[i + random.uniform(-1, 1)] for i in np.arange(start=1, stop=100, step=1)]
# calculate corresponding y coordinates
y = [f(i[0]) + random.uniform(-0.1, 0.1) for i in X]
# convert to numpy arrays
X = np.array(X)
y = np.array(y)
# generate x coordinates for demo plot
x_points = np.arange(start=0, stop=100, step=0.1)
X_demo = np.array([[x] for x in x_points])
# create a regressor then load data
regressor = ns.KNNRegressor(1, 10, 7)
regressor.load(X, y)
# create an array to store predicted y values for demo plot
y_predicted = []
# get predictions for all samples in X_demo
for sample in X_demo:
predicted_value = regressor.predict(sample, ns.distance.euclidean, ns.kernel.gaussian, 3)
y_predicted.append(predicted_value)
# plot train points
plt.plot(X, y, 'bo')
# plot predicted y against x
plt.plot(x_points, y_predicted, 'r')
plt.show()

View File

@@ -1,3 +1,4 @@
from .knn_classifier import *
from .classifier import *
from .regressor import *
from . import distance
from . import kernel

79
neighbours/regressor.py Normal file
View File

@@ -0,0 +1,79 @@
import numpy as np
from .rp_neighbours import *
from .exceptions import *
class KNNRegressor:
"""K-nearest neighbors regressor
Nadaraya-Watson kNN regressor based on random projection forest.
Supports different (including custom) smoothing kernels and distance metrics.
Attributes:
features: number of features in each sample
forest: an instance of RPTForest
targets: an array of target values corresponding to loaded train points
"""
def __init__(self, features, trees_count, rpt_m):
"""Initializes new regressor
:param features: number of features in each sample
:param trees_count: number of trees in the forest
:param rpt_m: maximum number of samples in one leaf of an RP tree
"""
self.features = features
self.forest = RPTForest(features, trees_count, rpt_m)
self.targets = None
def load(self, points, targets):
"""Loads train data, builds a corresponding forest
:param points: np.ndarray of train samples
:param targets: an array of target values corresponding to loaded train points
"""
if not isinstance(points, np.ndarray):
raise InvalidType("points should be represented as np.ndarray")
if not isinstance(targets, np.ndarray) and not isinstance(targets, list):
raise InvalidType("targets should be represented as np.ndarray or list")
self.targets = targets
if points.ndim != 2:
raise InvalidDimensionError("points array should be two-dimensional")
if points.shape[1] != self.features:
raise InvalidDimensionError(
"invalid number of features in sample (expected {}, got {})".format(self.features, points.shape[1])
)
self.forest.load(points)
def predict(self, point: np.ndarray, distance, kernel, h):
"""Predict target value for given point
:param point: target point as np.ndarray
:param distance: distance metric function (e.g., from neighbours.distance)
:param kernel: smoothing kernel function (e.g., from neighbours.kernel)
:param h: bandwidth
:return: predicted value or numpy.nan if unable to obtain a prediction
"""
nearest_point_indexes = self.forest.get_neighbours(point)
# Nadaraya-Watson estimator
numerator = float(0)
denominator = float(0)
for point_ix in nearest_point_indexes:
weight = kernel(distance(point, self.forest.get_point(point_ix)) / h)
numerator += weight * self.targets[point_ix]
denominator += weight
return np.nan if denominator == 0 else numerator / denominator