You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
129 lines
3.7 KiB
129 lines
3.7 KiB
import pandas as pd
|
|
import numpy as np
|
|
import yaml
|
|
import gzip
|
|
|
|
import os.path
|
|
import glob
|
|
|
|
from scipy.signal import savgol_filter
|
|
|
|
|
|
def load_data_RH_logger(filepath, every=1):
|
|
"""
|
|
|
|
"""
|
|
# Read the header
|
|
if filepath.endswith('gz'):
|
|
with gzip.open(filepath, 'rt') as f:
|
|
header = f.readline()
|
|
else:
|
|
with open(filepath, 'r') as f:
|
|
header = f.readline()
|
|
|
|
# reorganize header
|
|
header = header.strip('# ').rstrip('\n').split('|')
|
|
header.append('X') # Empty col...
|
|
|
|
# Load with pandas
|
|
if filepath.endswith('gz'):
|
|
df = pd.read_csv(filepath, sep=' ', compression='gzip', names=header, skiprows=1)
|
|
else:
|
|
df = pd.read_csv(filepath, sep=' ', names=header, skiprows=1)
|
|
|
|
# Clean up
|
|
df = df.drop(columns='X')
|
|
df = df.drop(np.arange(1))
|
|
|
|
# Crop data
|
|
df = df.reset_index()
|
|
del df['index']
|
|
|
|
return df.iloc[::every]
|
|
|
|
|
|
def process_data_RH_logger(filepath, every,
|
|
median_window=1,
|
|
diff_period=1_000,
|
|
SG_window_short=100,
|
|
SG_window_long=10_000):
|
|
|
|
|
|
|
|
# We load all the datapoints
|
|
df = load_data_RH_logger(filepath, every=1)
|
|
df = df.drop(columns=['time'])
|
|
df = df.rename(columns={'duration': 'time'})
|
|
|
|
|
|
# Median filter
|
|
df['weight'] = df['weight'].rolling(window=median_window, center=True).median()
|
|
df = df.dropna(subset=['weight'])
|
|
|
|
|
|
m0 = df['weight'].iloc[0]
|
|
mf = df['weight'].tail(300).mean()
|
|
# Variation
|
|
df['m'] = df['weight'] - m0
|
|
df['M'] = (df['weight'] - m0) / (mf - m0)
|
|
|
|
# Derivative
|
|
delta = np.mean(df['time'].diff())
|
|
df['dMdt_SG_short'] = savgol_filter(df['M'], window_length=SG_window_short, polyorder=1, deriv=1, delta=delta)
|
|
df['dMdt_SG_long'] = savgol_filter(df['M'], window_length=SG_window_long, polyorder=1, deriv=1, delta=delta)
|
|
df['dMdt_diff'] = df['M'].diff(periods=diff_period) / df['time'].diff(periods=diff_period)
|
|
|
|
df['dmdt_SG_short'] = savgol_filter(df['m'], window_length=SG_window_short, polyorder=1, deriv=1, delta=delta)
|
|
df['dmdt_SG_long'] = savgol_filter(df['m'], window_length=SG_window_long, polyorder=1, deriv=1, delta=delta)
|
|
df['dmdt_diff'] = df['m'].diff(periods=diff_period) / df['time'].diff(periods=diff_period)
|
|
|
|
h5path = os.path.splitext(filepath)[0]
|
|
h5path += '-processed.h5'
|
|
|
|
# Apply the subset
|
|
df = df.iloc[::every]
|
|
df.to_hdf(h5path, key='data')
|
|
|
|
|
|
def load_metadata(filepath):
|
|
"""
|
|
|
|
"""
|
|
with open(filepath, 'r') as stream:
|
|
metadata = yaml.safe_load(stream)
|
|
return metadata
|
|
|
|
|
|
def get_exp_list(directory):
|
|
"""
|
|
|
|
"""
|
|
data = []
|
|
for exp_dir in sorted(glob.glob(os.path.join(directory, '*'))):
|
|
regulator_name = os.path.split(exp_dir)[1].split('-')[0]
|
|
date = os.path.split(exp_dir)[1].split('-', maxsplit=1)[1].split('_')[0]
|
|
data_filename = os.path.join(exp_dir, '_data.dat.gz')
|
|
meta_filename = os.path.join(exp_dir, 'metadata.yml')
|
|
|
|
metadata = load_metadata(meta_filename)
|
|
metadata['date'] = date
|
|
metadata['path'] = exp_dir
|
|
metadata['regulator'] = regulator_name
|
|
data.append(metadata)
|
|
|
|
flattened_data = []
|
|
for line in data:
|
|
flattened_data.append({
|
|
'Date': line['date'],
|
|
'Device': line['regulator'],
|
|
'Experiment': line['exp'],
|
|
'Sample Type': line['sample']['type'],
|
|
'Width': line['sample']['width'],
|
|
'Thickness': line['sample']['thickness'],
|
|
'Length': line['sample']['length'],
|
|
'Comment': line['comment'],
|
|
'Path': line['path'],
|
|
})
|
|
|
|
return pd.DataFrame(flattened_data)
|