Spaces:
Running
Running
Christoph Hemmer
commited on
Commit
·
fcd5728
1
Parent(s):
e1b0bd6
preprocessing bug fix
Browse files- dynamix/preprocessing.py +15 -1
- dynamix/preprocessing_utilities.py +4 -11
dynamix/preprocessing.py
CHANGED
|
@@ -27,8 +27,12 @@ class DataPreprocessor:
|
|
| 27 |
# Parameters for inverse transformations
|
| 28 |
self.box_cox_params_list = None
|
| 29 |
self.detrending_params_list = None
|
|
|
|
|
|
|
|
|
|
| 30 |
self.context_mean = None
|
| 31 |
self.context_std = None
|
|
|
|
| 32 |
self.original_context = None
|
| 33 |
self.batch_size = None
|
| 34 |
self.feature_dim = None
|
|
@@ -46,6 +50,12 @@ class DataPreprocessor:
|
|
| 46 |
"""
|
| 47 |
# Store original context for inverse transformations
|
| 48 |
self.original_context = context.clone()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Apply Box-Cox transformation for each batch
|
| 51 |
if self.box_cox:
|
|
@@ -99,6 +109,10 @@ class DataPreprocessor:
|
|
| 99 |
batch_output = output[:, b, :]
|
| 100 |
batch_output = BoxCoxTransformer.inverse_transform(batch_output, self.box_cox_params_list[b])
|
| 101 |
output[:, b, :] = batch_output
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
return output
|
| 104 |
|
|
@@ -232,7 +246,7 @@ class DataPreprocessor:
|
|
| 232 |
# Apply transformations (Box-Cox, detrending)
|
| 233 |
context = self._apply_transformations(context)
|
| 234 |
|
| 235 |
-
# Standardize data
|
| 236 |
context = self._standardize_data(context)
|
| 237 |
|
| 238 |
# Apply embedding to reach model dimension
|
|
|
|
| 27 |
# Parameters for inverse transformations
|
| 28 |
self.box_cox_params_list = None
|
| 29 |
self.detrending_params_list = None
|
| 30 |
+
self.transformation_mean = None
|
| 31 |
+
self.transformation_std = None
|
| 32 |
+
|
| 33 |
self.context_mean = None
|
| 34 |
self.context_std = None
|
| 35 |
+
|
| 36 |
self.original_context = None
|
| 37 |
self.batch_size = None
|
| 38 |
self.feature_dim = None
|
|
|
|
| 50 |
"""
|
| 51 |
# Store original context for inverse transformations
|
| 52 |
self.original_context = context.clone()
|
| 53 |
+
|
| 54 |
+
# Before transformations standardize data
|
| 55 |
+
if self.box_cox or self.detrending:
|
| 56 |
+
self.transformation_mean = torch.mean(context, dim=0)
|
| 57 |
+
self.transformation_std = torch.std(context, dim=0)
|
| 58 |
+
context = (context - self.transformation_mean.unsqueeze(0)) / self.transformation_std.unsqueeze(0)
|
| 59 |
|
| 60 |
# Apply Box-Cox transformation for each batch
|
| 61 |
if self.box_cox:
|
|
|
|
| 109 |
batch_output = output[:, b, :]
|
| 110 |
batch_output = BoxCoxTransformer.inverse_transform(batch_output, self.box_cox_params_list[b])
|
| 111 |
output[:, b, :] = batch_output
|
| 112 |
+
|
| 113 |
+
# Apply inverse standardization if transformation was applied
|
| 114 |
+
if self.transformation_mean is not None and self.transformation_std is not None:
|
| 115 |
+
output = output * self.transformation_std.unsqueeze(0) + self.transformation_mean.unsqueeze(0)
|
| 116 |
|
| 117 |
return output
|
| 118 |
|
|
|
|
| 246 |
# Apply transformations (Box-Cox, detrending)
|
| 247 |
context = self._apply_transformations(context)
|
| 248 |
|
| 249 |
+
# Standardize data
|
| 250 |
context = self._standardize_data(context)
|
| 251 |
|
| 252 |
# Apply embedding to reach model dimension
|
dynamix/preprocessing_utilities.py
CHANGED
|
@@ -6,6 +6,7 @@ import random
|
|
| 6 |
from statsmodels.tsa.stattools import acf
|
| 7 |
from scipy.ndimage import gaussian_filter1d
|
| 8 |
from scipy import optimize
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class TimeSeriesProcessor:
|
|
@@ -434,11 +435,6 @@ class Detrending:
|
|
| 434 |
"""
|
| 435 |
# Convert to numpy
|
| 436 |
data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
|
| 437 |
-
|
| 438 |
-
# Apply min max scaling for a more stable trend fit
|
| 439 |
-
_min = np.min(data_np)
|
| 440 |
-
_max = np.max(data_np)
|
| 441 |
-
data_scaled = (data_np - _min) / (_max - _min)
|
| 442 |
|
| 443 |
seq_length, n_dims = data_np.shape
|
| 444 |
detrended_data = np.zeros_like(data_np)
|
|
@@ -446,10 +442,10 @@ class Detrending:
|
|
| 446 |
|
| 447 |
for dim in range(n_dims):
|
| 448 |
# Define the objective function for this dimension
|
| 449 |
-
objective = lambda params: Detrending.fit_objective(params,
|
| 450 |
|
| 451 |
# Initial parameter guess
|
| 452 |
-
initial_params = [0.0, 1.0,
|
| 453 |
|
| 454 |
# Bounds for parameters
|
| 455 |
bounds = [(None, None), (0.0, 3.0), (None, None)]
|
|
@@ -467,10 +463,7 @@ class Detrending:
|
|
| 467 |
'maxcor': 10
|
| 468 |
}
|
| 469 |
)
|
| 470 |
-
optimal_params = np.round(result.x,
|
| 471 |
-
#Adjust params to min max scale
|
| 472 |
-
optimal_params[0] = (_max - _min) * optimal_params[0]
|
| 473 |
-
optimal_params[2] = (_max - _min) * optimal_params[2] + _min
|
| 474 |
|
| 475 |
# Calculate trend and detrend the data
|
| 476 |
t = np.arange(1, seq_length + 1)
|
|
|
|
| 6 |
from statsmodels.tsa.stattools import acf
|
| 7 |
from scipy.ndimage import gaussian_filter1d
|
| 8 |
from scipy import optimize
|
| 9 |
+
from scipy.optimize import curve_fit
|
| 10 |
|
| 11 |
|
| 12 |
class TimeSeriesProcessor:
|
|
|
|
| 435 |
"""
|
| 436 |
# Convert to numpy
|
| 437 |
data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
seq_length, n_dims = data_np.shape
|
| 440 |
detrended_data = np.zeros_like(data_np)
|
|
|
|
| 442 |
|
| 443 |
for dim in range(n_dims):
|
| 444 |
# Define the objective function for this dimension
|
| 445 |
+
objective = lambda params: Detrending.fit_objective(params, data_np[:, dim])
|
| 446 |
|
| 447 |
# Initial parameter guess
|
| 448 |
+
initial_params = [0.0, 1.0, data_np[0,dim]]
|
| 449 |
|
| 450 |
# Bounds for parameters
|
| 451 |
bounds = [(None, None), (0.0, 3.0), (None, None)]
|
|
|
|
| 463 |
'maxcor': 10
|
| 464 |
}
|
| 465 |
)
|
| 466 |
+
optimal_params = np.round(result.x, 10)
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
# Calculate trend and detrend the data
|
| 469 |
t = np.arange(1, seq_length + 1)
|