Spaces:
Sleeping
Sleeping
| import time | |
| from functools import wraps | |
| def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df): | |
| # Define the mapping from original values to new alpha parameters | |
| value_to_alpha = { | |
| 0.00191: alpha_low, | |
| 0.00767: alpha_high, | |
| 0.0038: alpha_med | |
| } | |
| # Check if each value is present at least once in the DataFrame | |
| for original_value in value_to_alpha.keys(): | |
| if not (original_df == original_value).any().any(): | |
| raise ValueError(f"Value {original_value} not found in the input DataFrame.") | |
| # Create a new DataFrame based on the original one | |
| new_df = original_df.copy() | |
| # Apply the mapping to each element in the DataFrame | |
| for original_value, new_value in value_to_alpha.items(): | |
| new_df = new_df.replace(original_value, new_value) | |
| return new_df | |
| def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None): | |
| df.columns = df.iloc[headerRow_idx] #Set the header | |
| if rowNames_idx is not None: | |
| df.index = df.iloc[:, rowNames_idx] #Set the row names | |
| df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data | |
| return df | |
| def timeit(f): | |
| def timed(*args, **kw): | |
| ts = time.time() | |
| result = f(*args, **kw) | |
| te = time.time() | |
| print ('func:%r args:[%r, %r] took: %2.4f sec' % \ | |
| (f.__name__, te-ts)) | |
| #(f.__name__, args, kw, te-ts)) | |
| return result | |
| return timed | |
| def timing_decorator(func): | |
| def wrapper(*args, **kwargs): | |
| start_time = time.time() | |
| result = func(*args, **kwargs) | |
| end_time = time.time() | |
| duration = end_time - start_time | |
| timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) | |
| print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}") | |
| return result | |
| return wrapper | |
| # Function to compare two dataframes after converting and rounding | |
| def compare_dataframes(df1, df2, decimals=8): | |
| # Function to convert DataFrame columns to float and then round | |
| def convert_and_round_dataframe(df, decimals): | |
| # Convert all columns to float | |
| df_float = df.astype(float) | |
| # Round to the specified number of decimals | |
| return df_float.round(decimals) | |
| rounded_df1 = convert_and_round_dataframe(df1, decimals) | |
| rounded_df2 = convert_and_round_dataframe(df2, decimals) | |
| are_equal = rounded_df1.equals(rounded_df2) | |
| print("Both methods are equal:", are_equal) | |
| print("Numba shape:", df2.shape) | |
| print("Original shape:", df1.shape) | |
| print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)") | |
| print(df1.iloc[0:5].head(2)) | |
| print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)") | |
| print(df2.iloc[0:5].head(2)) | |
| def align_dataframes(df1, df2, key): | |
| """ | |
| Align two dataframes based on a common key, ensuring that both dataframes | |
| have only the rows with matching keys. | |
| Parameters: | |
| - df1: First dataframe. | |
| - df2: Second dataframe. | |
| - key: Column name to align dataframes on. | |
| Returns: | |
| - df1_aligned, df2_aligned: Tuple of aligned dataframes. | |
| """ | |
| common_ids = df1.index.intersection(df2[key]) | |
| df1_aligned = df1.loc[common_ids] | |
| df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False) | |
| return df1_aligned, df2_aligned | |