Skip to content

Commit 3cc6f52

Browse files
committed
Change the way how polynomials and sinusoids are generated.
1 parent 9773c98 commit 3cc6f52

14 files changed

+1899
-1597
lines changed

homemade/linear_regression/linear_regression.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,34 @@
66

77

88
class LinearRegression:
9+
# pylint: disable=too-many-instance-attributes
910
"""Linear Regression Class"""
1011

11-
def __init__(self, data, labels, polynomial_degree=0, sinusoid_degree=0):
12+
def __init__(self, data, labels, polynomial_degree=0, sinusoid_degree=0, normalize_data=True):
13+
# pylint: disable=too-many-arguments
1214
"""Linear regression constructor.
1315
1416
:param data: training set.
1517
:param labels: training set outputs (correct values).
1618
:param polynomial_degree: degree of additional polynomial features.
1719
:param sinusoid_degree: multipliers for sinusoidal features.
20+
:param normalize_data: flag that indicates that features should be normalized.
1821
"""
1922

2023
# Normalize features and add ones column.
2124
(
2225
data_processed,
2326
features_mean,
2427
features_deviation
25-
) = prepare_for_training(data, polynomial_degree, sinusoid_degree)
28+
) = prepare_for_training(data, polynomial_degree, sinusoid_degree, normalize_data)
2629

2730
self.data = data_processed
2831
self.labels = labels
2932
self.features_mean = features_mean
3033
self.features_deviation = features_deviation
3134
self.polynomial_degree = polynomial_degree
3235
self.sinusoid_degree = sinusoid_degree
36+
self.normalize_data = normalize_data
3337

3438
# Initialize model parameters.
3539
num_features = self.data.shape[1]
@@ -113,7 +117,8 @@ def get_cost(self, data, labels, lambda_param):
113117
data_processed = prepare_for_training(
114118
data,
115119
self.polynomial_degree,
116-
self.sinusoid_degree
120+
self.sinusoid_degree,
121+
self.normalize_data,
117122
)[0]
118123

119124
return self.cost_function(data_processed, labels, lambda_param)
@@ -155,7 +160,8 @@ def predict(self, data):
155160
data_processed = prepare_for_training(
156161
data,
157162
self.polynomial_degree,
158-
self.sinusoid_degree
163+
self.sinusoid_degree,
164+
self.normalize_data,
159165
)[0]
160166

161167
# Do predictions using model hypothesis.

homemade/logistic_regression/logistic_regression.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77

88

99
class LogisticRegression:
10-
# pylint: disable=R0902
10+
# pylint: disable=too-many-instance-attributes
1111
"""Logistic Regression Class"""
1212

1313
def __init__(self, data, labels, polynomial_degree=0, sinusoid_degree=0, normalize_data=False):
14-
# pylint: disable=R0913
14+
# pylint: disable=too-many-arguments
1515
"""Logistic regression constructor.
1616
1717
:param data: training set.

homemade/neural_network/multilayer_perceptron.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
class MultilayerPerceptron:
99
"""Multilayer Perceptron Class"""
1010

11-
# pylint: disable=R0913
11+
# pylint: disable=too-many-arguments
1212
def __init__(self, data, labels, layers, epsilon, normalize_data=False):
1313
"""Multilayer perceptron constructor.
1414
@@ -72,7 +72,7 @@ def predict(self, data):
7272
def gradient_descent(
7373
data, labels, unrolled_theta, layers, regularization_param, max_iteration, alpha
7474
):
75-
# pylint: disable=R0913
75+
# pylint: disable=too-many-arguments
7676
"""Gradient descent function.
7777
7878
Iteratively optimizes theta model parameters.

homemade/utils/features/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Dataset Features Related Utils"""
22

33
from .normalize import normalize
4-
from .add_polynomials import add_polynomials
5-
from .add_sinusoids import add_sinusoids
4+
from .generate_polynomials import generate_polynomials
5+
from .generate_sinusoids import generate_sinusoids
66
from .prepare_for_training import prepare_for_training

homemade/utils/features/add_polynomials.py

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Add polynomial features to the features set"""
2+
3+
import numpy as np
4+
from .normalize import normalize
5+
6+
7+
def generate_polynomials(dataset, polynomial_degree, normalize_data=False):
8+
"""Extends data set with polynomial features of certain degree.
9+
10+
Returns a new feature array with more features, comprising of
11+
x1, x2, x1^2, x2^2, x1*x2, x1*x2^2, etc.
12+
13+
:param dataset: dataset that we want to generate polynomials for.
14+
:param polynomial_degree: the max power of new features.
15+
:param normalize_data: flag that indicates whether polynomials need to normalized or not.
16+
"""
17+
18+
# Split features on two halves.
19+
features_split = np.array_split(dataset, 2, axis=1)
20+
dataset_1 = features_split[0]
21+
dataset_2 = features_split[1]
22+
23+
# Extract sets parameters.
24+
(num_examples_1, num_features_1) = dataset_1.shape
25+
(num_examples_2, num_features_2) = dataset_2.shape
26+
27+
# Check if two sets have equal amount of rows.
28+
if num_examples_1 != num_examples_2:
29+
raise ValueError('Can not generate polynomials for two sets with different number of rows')
30+
31+
# Check if at list one set has features.
32+
if num_features_1 == 0 and num_features_2 == 0:
33+
raise ValueError('Can not generate polynomials for two sets with no columns')
34+
35+
# Replace empty set with non-empty one.
36+
if num_features_1 == 0:
37+
dataset_1 = dataset_2
38+
elif num_features_2 == 0:
39+
dataset_2 = dataset_1
40+
41+
# Make sure that sets have the same number of features in order to be able to multiply them.
42+
num_features = num_features_1 if num_features_1 < num_examples_2 else num_features_2
43+
dataset_1 = dataset_1[:, :num_features]
44+
dataset_2 = dataset_2[:, :num_features]
45+
46+
# Create polynomials matrix.
47+
polynomials = np.empty((num_examples_1, 0))
48+
49+
# Generate polynomial features of specified degree.
50+
for i in range(1, polynomial_degree + 1):
51+
for j in range(i + 1):
52+
polynomial_feature = (dataset_1 ** (i - j)) * (dataset_2 ** j)
53+
polynomials = np.concatenate((polynomials, polynomial_feature), axis=1)
54+
55+
# Normalize polynomials if needed.
56+
if normalize_data:
57+
polynomials = normalize(polynomials)[0]
58+
59+
# Return generated polynomial features.
60+
return polynomials

homemade/utils/features/add_sinusoids.py renamed to homemade/utils/features/generate_sinusoids.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44

55

6-
def add_sinusoids(dataset, sinusoid_degree):
6+
def generate_sinusoids(dataset, sinusoid_degree):
77
"""Extends data set with sinusoid features.
88
99
Returns a new feature array with more features, comprising of
@@ -13,10 +13,14 @@ def add_sinusoids(dataset, sinusoid_degree):
1313
:param sinusoid_degree: multiplier for sinusoid parameter multiplications
1414
"""
1515

16-
sinusoids = np.empty((dataset.shape[0], 0))
16+
# Create sinusoids matrix.
17+
num_examples = dataset.shape[0]
18+
sinusoids = np.empty((num_examples, 0))
1719

18-
for degree in range(1, sinusoid_degree):
20+
# Generate sinusoid features of specified degree.
21+
for degree in range(1, sinusoid_degree + 1):
1922
sinusoid_features = np.sin(degree * dataset)
2023
sinusoids = np.concatenate((sinusoids, sinusoid_features), axis=1)
2124

25+
# Return generated sinusoidal features.
2226
return sinusoids

homemade/utils/features/prepare_for_training.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
"""Prepares the dataset for training"""
22

3-
import math
43
import numpy as np
54
from .normalize import normalize
6-
from .add_sinusoids import add_sinusoids
7-
from .add_polynomials import add_polynomials
5+
from .generate_sinusoids import generate_sinusoids
6+
from .generate_polynomials import generate_polynomials
87

98

109
def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True):
@@ -19,29 +18,27 @@ def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize
1918
# Normalize data set.
2019
features_mean = 0
2120
features_deviation = 0
21+
data_normalized = data_processed
2222
if normalize_data:
2323
(
24-
data_processed,
24+
data_normalized,
2525
features_mean,
2626
features_deviation
2727
) = normalize(data_processed)
2828

29+
# Replace processed data with normalized processed data.
30+
# We need to have normalized data below while we will adding polynomials and sinusoids.
31+
data_processed = data_normalized
32+
2933
# Add sinusoidal features to the dataset.
30-
if sinusoid_degree:
31-
data_processed = add_sinusoids(data_processed, sinusoid_degree)
34+
if sinusoid_degree > 0:
35+
sinusoids = generate_sinusoids(data_normalized, sinusoid_degree)
36+
data_processed = np.concatenate((data_processed, sinusoids), axis=1)
3237

3338
# Add polynomial features to data set.
34-
if polynomial_degree >= 2:
35-
current_features_num = data_processed.shape[1]
36-
middle_feature_index = math.floor(current_features_num / 2)
37-
38-
# Split features on halves.
39-
features_split = np.split(data_processed, [middle_feature_index], axis=1)
40-
first_half = features_split[0]
41-
second_half = features_split[1]
42-
43-
# Generate polynomials.
44-
data_processed = add_polynomials(first_half, second_half, polynomial_degree)
39+
if polynomial_degree > 0:
40+
polynomials = generate_polynomials(data_normalized, polynomial_degree, normalize_data)
41+
data_processed = np.concatenate((data_processed, polynomials), axis=1)
4542

4643
# Add a column of ones to X.
4744
data_processed = np.hstack((np.ones((num_examples, 1)), data_processed))

0 commit comments

Comments
 (0)