Spaces:

serJD
/

RECODE_HF_tripGeneration

Sleeping

App Files Files Community

RECODE_HF_tripGeneration / utils.py

serJD

ini commit

ce3dfc6 almost 2 years ago

raw

history blame contribute delete

3.56 kB


	import time
	from functools import wraps

	def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df):
	# Define the mapping from original values to new alpha parameters
	value_to_alpha = {
	0.00191: alpha_low,
	0.00767: alpha_high,
	0.0038: alpha_med
	}

	# Check if each value is present at least once in the DataFrame
	for original_value in value_to_alpha.keys():
	if not (original_df == original_value).any().any():
	raise ValueError(f"Value {original_value} not found in the input DataFrame.")

	# Create a new DataFrame based on the original one
	new_df = original_df.copy()

	# Apply the mapping to each element in the DataFrame
	for original_value, new_value in value_to_alpha.items():
	new_df = new_df.replace(original_value, new_value)

	return new_df

	def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None):
	df.columns = df.iloc[headerRow_idx] #Set the header
	if rowNames_idx is not None:
	df.index = df.iloc[:, rowNames_idx] #Set the row names
	df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data
	return df




	def timeit(f):
	def timed(args, *kw):
	ts = time.time()
	result = f(args, *kw)
	te = time.time()
	print ('func:%r args:[%r, %r] took: %2.4f sec' % \
	(f.__name__, te-ts))
	#(f.__name__, args, kw, te-ts))
	return result
	return timed





	def timing_decorator(func):
	@wraps(func)
	def wrapper(args, *kwargs):
	start_time = time.time()
	result = func(args, *kwargs)
	end_time = time.time()

	duration = end_time - start_time
	timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))

	print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}")
	return result

	return wrapper


	# Function to compare two dataframes after converting and rounding
	def compare_dataframes(df1, df2, decimals=8):
	# Function to convert DataFrame columns to float and then round
	def convert_and_round_dataframe(df, decimals):
	# Convert all columns to float
	df_float = df.astype(float)
	# Round to the specified number of decimals
	return df_float.round(decimals)

	rounded_df1 = convert_and_round_dataframe(df1, decimals)
	rounded_df2 = convert_and_round_dataframe(df2, decimals)

	are_equal = rounded_df1.equals(rounded_df2)

	print("Both methods are equal:", are_equal)

	print("Numba shape:", df2.shape)
	print("Original shape:", df1.shape)

	print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)")
	print(df1.iloc[0:5].head(2))

	print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)")
	print(df2.iloc[0:5].head(2))


	def align_dataframes(df1, df2, key):
	"""
	Align two dataframes based on a common key, ensuring that both dataframes
	have only the rows with matching keys.

	Parameters:
	- df1: First dataframe.
	- df2: Second dataframe.
	- key: Column name to align dataframes on.

	Returns:
	- df1_aligned, df2_aligned: Tuple of aligned dataframes.
	"""
	common_ids = df1.index.intersection(df2[key])
	df1_aligned = df1.loc[common_ids]
	df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False)
	return df1_aligned, df2_aligned