You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
3.7 KiB

import pandas as pd
import numpy as np
import yaml
import gzip
import os.path
import glob
from scipy.signal import savgol_filter
def load_data_RH_logger(filepath, every=1):
"""
"""
# Read the header
if filepath.endswith('gz'):
with gzip.open(filepath, 'rt') as f:
header = f.readline()
else:
with open(filepath, 'r') as f:
header = f.readline()
# reorganize header
header = header.strip('# ').rstrip('\n').split('|')
header.append('X') # Empty col...
# Load with pandas
if filepath.endswith('gz'):
df = pd.read_csv(filepath, sep=' ', compression='gzip', names=header, skiprows=1)
else:
df = pd.read_csv(filepath, sep=' ', names=header, skiprows=1)
# Clean up
df = df.drop(columns='X')
df = df.drop(np.arange(1))
# Crop data
df = df.reset_index()
del df['index']
return df.iloc[::every]
def process_data_RH_logger(filepath, every,
median_window=1,
diff_period=1_000,
SG_window_short=100,
SG_window_long=10_000):
# We load all the datapoints
df = load_data_RH_logger(filepath, every=1)
df = df.drop(columns=['time'])
df = df.rename(columns={'duration': 'time'})
# Median filter
df['weight'] = df['weight'].rolling(window=median_window, center=True).median()
df = df.dropna(subset=['weight'])
m0 = df['weight'].iloc[0]
mf = df['weight'].tail(300).mean()
# Variation
df['m'] = df['weight'] - m0
df['M'] = (df['weight'] - m0) / (mf - m0)
# Derivative
delta = np.mean(df['time'].diff())
df['dMdt_SG_short'] = savgol_filter(df['M'], window_length=SG_window_short, polyorder=1, deriv=1, delta=delta)
df['dMdt_SG_long'] = savgol_filter(df['M'], window_length=SG_window_long, polyorder=1, deriv=1, delta=delta)
df['dMdt_diff'] = df['M'].diff(periods=diff_period) / df['time'].diff(periods=diff_period)
df['dmdt_SG_short'] = savgol_filter(df['m'], window_length=SG_window_short, polyorder=1, deriv=1, delta=delta)
df['dmdt_SG_long'] = savgol_filter(df['m'], window_length=SG_window_long, polyorder=1, deriv=1, delta=delta)
df['dmdt_diff'] = df['m'].diff(periods=diff_period) / df['time'].diff(periods=diff_period)
h5path = os.path.splitext(filepath)[0]
h5path += '-processed.h5'
# Apply the subset
df = df.iloc[::every]
df.to_hdf(h5path, key='data')
def load_metadata(filepath):
"""
"""
with open(filepath, 'r') as stream:
metadata = yaml.safe_load(stream)
return metadata
def get_exp_list(directory):
"""
"""
data = []
for exp_dir in sorted(glob.glob(os.path.join(directory, '*'))):
regulator_name = os.path.split(exp_dir)[1].split('-')[0]
date = os.path.split(exp_dir)[1].split('-', maxsplit=1)[1].split('_')[0]
data_filename = os.path.join(exp_dir, '_data.dat.gz')
meta_filename = os.path.join(exp_dir, 'metadata.yml')
metadata = load_metadata(meta_filename)
metadata['date'] = date
metadata['path'] = exp_dir
metadata['regulator'] = regulator_name
data.append(metadata)
flattened_data = []
for line in data:
flattened_data.append({
'Date': line['date'],
'Device': line['regulator'],
'Experiment': line['exp'],
'Sample Type': line['sample']['type'],
'Width': line['sample']['width'],
'Thickness': line['sample']['thickness'],
'Length': line['sample']['length'],
'Comment': line['comment'],
'Path': line['path'],
})
return pd.DataFrame(flattened_data)