labsw/labsw/io.py
François Boulogne e69f87b93e update dimless
2024-12-06 13:31:07 +01:00

105 lines
2.9 KiB
Python

import pandas as pd
import numpy as np
import yaml
import gzip
import os.path
import glob
from scipy.signal import savgol_filter
def load_data_RH_logger(filepath, every=1):
"""
"""
# Read the header
if filepath.endswith('gz'):
with gzip.open(filepath, 'rt') as f:
header = f.readline()
else:
with open(filepath, 'r') as f:
header = f.readline()
# reorganize header
header = header.strip('# ').rstrip('\n').split('|')
header.append('X') # Empty col...
# Load with pandas
if filepath.endswith('gz'):
df = pd.read_csv(filepath, sep=' ', compression='gzip', names=header, skiprows=1)
else:
df = pd.read_csv(filepath, sep=' ', names=header, skiprows=1)
# Clean up
df = df.drop(columns='X')
df = df.drop(np.arange(1))
# Crop data
df = df.reset_index()
del df['index']
return df.iloc[::every]
def process_data_RH_logger(filepath, every, diff_period=1_000, SG_window=10_000):
df = load_data_RH_logger(filepath, every=1)
m0 = df['weight'].iloc[0]
mf = df['dm_m'].tail(300).mean()
# Variation
df['m'] = df['weight'] - m0
df['M'] = (df['weight'] - m0) / (mf - m0)
# Derivative
delta = np.mean(df['time'].diff())
df['dMdt_SG'] = savgol_filter(df['M'], window_length=SG_window, polyorder=1, deriv=1, delta=delta)
df['dMdt_diff'] = df['M'].diff(periods=diff_period) / df['time'].diff(periods=diff_period)
df['dmdt_SG'] = savgol_filter(df['m'], window_length=SG_window, polyorder=1, deriv=1, delta=delta)
df['dmdt_diff'] = df['m'].diff(periods=diff_period) / df['time'].diff(periods=diff_period)
h5path = os.path.splitext(filepath)[0]
h5path += '-processed.h5'
df = df.iloc[::every]
df.to_hdf(h5path, key='data')
def load_metadata(filepath):
"""
"""
with open(filepath, 'r') as stream:
metadata = yaml.safe_load(stream)
return metadata
def get_exp_list(directory):
"""
"""
data = []
for exp_dir in sorted(glob.glob(os.path.join(directory, '*'))):
date = os.path.split(exp_dir)[1].split('-', maxsplit=1)[1].split('_')[0]
data_filename = glob.glob(os.path.join(exp_dir, '*.dat.gz'))[0]
meta_filename = glob.glob(os.path.join(exp_dir, '*yml'))[0]
metadata = load_metadata(meta_filename)
metadata['date'] = date
metadata['path'] = exp_dir
data.append(metadata)
flattened_data = []
for line in data:
flattened_data.append({
'Date': line['date'],
'Experiment': line['exp'],
'Sample Type': line['sample']['type'],
'Width': line['sample']['width'],
'Thickness': line['sample']['thickness'],
'Length': line['sample']['length'],
'Comment': line['comment'],
'Path': line['path'],
})
return pd.DataFrame(flattened_data)