Spaces:
Sleeping
Sleeping
fix: boolean selection masks in pandas eval
Browse files- pysr/export_numpy.py +10 -2
- pysr/sr.py +1 -1
- pysr/test/test.py +3 -3
pysr/export_numpy.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
"""Code for exporting discovered expressions to numpy"""
|
| 2 |
|
| 3 |
import warnings
|
|
|
|
| 4 |
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
-
from
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
@@ -14,6 +16,10 @@ def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
| 14 |
class CallableEquation:
|
| 15 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def __init__(self, eqn, sympy_symbols, selection=None):
|
| 18 |
self._sympy = eqn
|
| 19 |
self._sympy_symbols = sympy_symbols
|
|
@@ -29,8 +35,9 @@ class CallableEquation:
|
|
| 29 |
return self._lambda(
|
| 30 |
**{k: X[k].values for k in map(str, self._sympy_symbols)}
|
| 31 |
) * np.ones(expected_shape)
|
|
|
|
| 32 |
if self._selection is not None:
|
| 33 |
-
if X.shape[1] !=
|
| 34 |
warnings.warn(
|
| 35 |
"`X` should be of shape (n_samples, len(self._selection)). "
|
| 36 |
"Automatically filtering `X` to selection. "
|
|
@@ -38,6 +45,7 @@ class CallableEquation:
|
|
| 38 |
"this may lead to incorrect predictions and other errors."
|
| 39 |
)
|
| 40 |
X = X[:, self._selection]
|
|
|
|
| 41 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
| 42 |
|
| 43 |
@property
|
|
|
|
| 1 |
"""Code for exporting discovered expressions to numpy"""
|
| 2 |
|
| 3 |
import warnings
|
| 4 |
+
from typing import List, Union
|
| 5 |
|
| 6 |
import numpy as np
|
| 7 |
import pandas as pd
|
| 8 |
+
from numpy.typing import NDArray
|
| 9 |
+
from sympy import Expr, Symbol, lambdify
|
| 10 |
|
| 11 |
|
| 12 |
def sympy2numpy(eqn, sympy_symbols, *, selection=None):
|
|
|
|
| 16 |
class CallableEquation:
|
| 17 |
"""Simple wrapper for numpy lambda functions built with sympy"""
|
| 18 |
|
| 19 |
+
_sympy: Expr
|
| 20 |
+
_sympy_symbols: List[Symbol]
|
| 21 |
+
_selection: Union[NDArray[np.bool_], None]
|
| 22 |
+
|
| 23 |
def __init__(self, eqn, sympy_symbols, selection=None):
|
| 24 |
self._sympy = eqn
|
| 25 |
self._sympy_symbols = sympy_symbols
|
|
|
|
| 35 |
return self._lambda(
|
| 36 |
**{k: X[k].values for k in map(str, self._sympy_symbols)}
|
| 37 |
) * np.ones(expected_shape)
|
| 38 |
+
|
| 39 |
if self._selection is not None:
|
| 40 |
+
if X.shape[1] != self._selection.sum():
|
| 41 |
warnings.warn(
|
| 42 |
"`X` should be of shape (n_samples, len(self._selection)). "
|
| 43 |
"Automatically filtering `X` to selection. "
|
|
|
|
| 45 |
"this may lead to incorrect predictions and other errors."
|
| 46 |
)
|
| 47 |
X = X[:, self._selection]
|
| 48 |
+
|
| 49 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
| 50 |
|
| 51 |
@property
|
pysr/sr.py
CHANGED
|
@@ -2056,7 +2056,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2056 |
if self.selection_mask_ is not None:
|
| 2057 |
# RangeIndex enforces column order allowing columns to
|
| 2058 |
# be correctly filtered with self.selection_mask_
|
| 2059 |
-
X = X.
|
| 2060 |
X.columns = self.feature_names_in_
|
| 2061 |
# Without feature information, CallableEquation/lambda_format equations
|
| 2062 |
# require that the column order of X matches that of the X used during
|
|
|
|
| 2056 |
if self.selection_mask_ is not None:
|
| 2057 |
# RangeIndex enforces column order allowing columns to
|
| 2058 |
# be correctly filtered with self.selection_mask_
|
| 2059 |
+
X = X[X.columns[self.selection_mask_]]
|
| 2060 |
X.columns = self.feature_names_in_
|
| 2061 |
# Without feature information, CallableEquation/lambda_format equations
|
| 2062 |
# require that the column order of X matches that of the X used during
|
pysr/test/test.py
CHANGED
|
@@ -526,7 +526,7 @@ class TestFeatureSelection(unittest.TestCase):
|
|
| 526 |
X = self.rstate.randn(20000, 5)
|
| 527 |
y = X[:, 2] ** 2 + X[:, 3] ** 2
|
| 528 |
selected = run_feature_selection(X, y, select_k_features=2)
|
| 529 |
-
|
| 530 |
|
| 531 |
def test_feature_selection_handler(self):
|
| 532 |
X = self.rstate.randn(20000, 5)
|
|
@@ -538,8 +538,8 @@ class TestFeatureSelection(unittest.TestCase):
|
|
| 538 |
variable_names=var_names,
|
| 539 |
y=y,
|
| 540 |
)
|
| 541 |
-
|
| 542 |
-
selected_var_names = [var_names[i] for i in selection]
|
| 543 |
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
|
| 544 |
np.testing.assert_array_equal(
|
| 545 |
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
|
|
|
|
| 526 |
X = self.rstate.randn(20000, 5)
|
| 527 |
y = X[:, 2] ** 2 + X[:, 3] ** 2
|
| 528 |
selected = run_feature_selection(X, y, select_k_features=2)
|
| 529 |
+
np.testing.assert_array_equal(selected, [False, False, True, True, False])
|
| 530 |
|
| 531 |
def test_feature_selection_handler(self):
|
| 532 |
X = self.rstate.randn(20000, 5)
|
|
|
|
| 538 |
variable_names=var_names,
|
| 539 |
y=y,
|
| 540 |
)
|
| 541 |
+
np.testing.assert_array_equal(selection, [False, False, True, True, False])
|
| 542 |
+
selected_var_names = [var_names[i] for i in range(5) if selection[i]]
|
| 543 |
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
|
| 544 |
np.testing.assert_array_equal(
|
| 545 |
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
|