Christoph Hemmer commited on
Commit
fcd5728
·
1 Parent(s): e1b0bd6

preprocessing bug fix

Browse files
dynamix/preprocessing.py CHANGED
@@ -27,8 +27,12 @@ class DataPreprocessor:
27
  # Parameters for inverse transformations
28
  self.box_cox_params_list = None
29
  self.detrending_params_list = None
 
 
 
30
  self.context_mean = None
31
  self.context_std = None
 
32
  self.original_context = None
33
  self.batch_size = None
34
  self.feature_dim = None
@@ -46,6 +50,12 @@ class DataPreprocessor:
46
  """
47
  # Store original context for inverse transformations
48
  self.original_context = context.clone()
 
 
 
 
 
 
49
 
50
  # Apply Box-Cox transformation for each batch
51
  if self.box_cox:
@@ -99,6 +109,10 @@ class DataPreprocessor:
99
  batch_output = output[:, b, :]
100
  batch_output = BoxCoxTransformer.inverse_transform(batch_output, self.box_cox_params_list[b])
101
  output[:, b, :] = batch_output
 
 
 
 
102
 
103
  return output
104
 
@@ -232,7 +246,7 @@ class DataPreprocessor:
232
  # Apply transformations (Box-Cox, detrending)
233
  context = self._apply_transformations(context)
234
 
235
- # Standardize data if requested
236
  context = self._standardize_data(context)
237
 
238
  # Apply embedding to reach model dimension
 
27
  # Parameters for inverse transformations
28
  self.box_cox_params_list = None
29
  self.detrending_params_list = None
30
+ self.transformation_mean = None
31
+ self.transformation_std = None
32
+
33
  self.context_mean = None
34
  self.context_std = None
35
+
36
  self.original_context = None
37
  self.batch_size = None
38
  self.feature_dim = None
 
50
  """
51
  # Store original context for inverse transformations
52
  self.original_context = context.clone()
53
+
54
+ # Before transformations standardize data
55
+ if self.box_cox or self.detrending:
56
+ self.transformation_mean = torch.mean(context, dim=0)
57
+ self.transformation_std = torch.std(context, dim=0)
58
+ context = (context - self.transformation_mean.unsqueeze(0)) / self.transformation_std.unsqueeze(0)
59
 
60
  # Apply Box-Cox transformation for each batch
61
  if self.box_cox:
 
109
  batch_output = output[:, b, :]
110
  batch_output = BoxCoxTransformer.inverse_transform(batch_output, self.box_cox_params_list[b])
111
  output[:, b, :] = batch_output
112
+
113
+ # Apply inverse standardization if transformation was applied
114
+ if self.transformation_mean is not None and self.transformation_std is not None:
115
+ output = output * self.transformation_std.unsqueeze(0) + self.transformation_mean.unsqueeze(0)
116
 
117
  return output
118
 
 
246
  # Apply transformations (Box-Cox, detrending)
247
  context = self._apply_transformations(context)
248
 
249
+ # Standardize data
250
  context = self._standardize_data(context)
251
 
252
  # Apply embedding to reach model dimension
dynamix/preprocessing_utilities.py CHANGED
@@ -6,6 +6,7 @@ import random
6
  from statsmodels.tsa.stattools import acf
7
  from scipy.ndimage import gaussian_filter1d
8
  from scipy import optimize
 
9
 
10
 
11
  class TimeSeriesProcessor:
@@ -434,11 +435,6 @@ class Detrending:
434
  """
435
  # Convert to numpy
436
  data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
437
-
438
- # Apply min max scaling for a more stable trend fit
439
- _min = np.min(data_np)
440
- _max = np.max(data_np)
441
- data_scaled = (data_np - _min) / (_max - _min)
442
 
443
  seq_length, n_dims = data_np.shape
444
  detrended_data = np.zeros_like(data_np)
@@ -446,10 +442,10 @@ class Detrending:
446
 
447
  for dim in range(n_dims):
448
  # Define the objective function for this dimension
449
- objective = lambda params: Detrending.fit_objective(params, data_scaled[:, dim])
450
 
451
  # Initial parameter guess
452
- initial_params = [0.0, 1.0, data_scaled[0,dim]]
453
 
454
  # Bounds for parameters
455
  bounds = [(None, None), (0.0, 3.0), (None, None)]
@@ -467,10 +463,7 @@ class Detrending:
467
  'maxcor': 10
468
  }
469
  )
470
- optimal_params = np.round(result.x, 3)
471
- #Adjust params to min max scale
472
- optimal_params[0] = (_max - _min) * optimal_params[0]
473
- optimal_params[2] = (_max - _min) * optimal_params[2] + _min
474
 
475
  # Calculate trend and detrend the data
476
  t = np.arange(1, seq_length + 1)
 
6
  from statsmodels.tsa.stattools import acf
7
  from scipy.ndimage import gaussian_filter1d
8
  from scipy import optimize
9
+ from scipy.optimize import curve_fit
10
 
11
 
12
  class TimeSeriesProcessor:
 
435
  """
436
  # Convert to numpy
437
  data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
 
 
 
 
 
438
 
439
  seq_length, n_dims = data_np.shape
440
  detrended_data = np.zeros_like(data_np)
 
442
 
443
  for dim in range(n_dims):
444
  # Define the objective function for this dimension
445
+ objective = lambda params: Detrending.fit_objective(params, data_np[:, dim])
446
 
447
  # Initial parameter guess
448
+ initial_params = [0.0, 1.0, data_np[0,dim]]
449
 
450
  # Bounds for parameters
451
  bounds = [(None, None), (0.0, 3.0), (None, None)]
 
463
  'maxcor': 10
464
  }
465
  )
466
+ optimal_params = np.round(result.x, 10)
 
 
 
467
 
468
  # Calculate trend and detrend the data
469
  t = np.arange(1, seq_length + 1)