Source code for mftoolkit.mfsources
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 27 10:14:32 2025
@author: Nahuel Mendez & Sebastian Jaroszewicz
"""
import numpy as np
import multiprocessing
import warnings, logging
logger = logging.getLogger(__name__)
[docs]
def shuffle_surrogate(original_series: np.ndarray, num_shuffles: int = 100) -> list[np.ndarray]:
""" Generate surrogate time series by randomly shuffling the original series.
This method creates surrogate series that have the exact same amplitude
distribution (histogram) as the original series. However, it destroys all
temporal structures, including both linear and non-linear correlations,
by randomly reordering the data points.
Parameters
----------
original_series : array_like
The 1D input time series to create surrogates from.
num_shuffles : int, optional
The number of shuffled surrogate series to generate.
Defaults to 100.
Returns
-------
np.ndarray or list[np.ndarray]
- If num_shuffles is 1 (default), returns a single NumPy array.
- If num_shuffles > 1, returns a list of NumPy arrays.
"""
# Ensure the input is a NumPy array for consistent handling
series_data = np.asarray(original_series)
# Use a list comprehension for a concise and efficient loop
shuffle = [np.random.permutation(series_data) for _ in range(num_shuffles)]
return shuffle[0] if num_shuffles == 1 else shuffle
def _iaaft_worker(args):
"""
Worker function. Generate a surrogate time series using the IAAFT algorithm.
This method creates a surrogate series that has the same power spectrum
(and thus the same linear autocorrelation) and the same amplitude
distribution (histogram) as the original series. It is used to create
a null model for hypothesis testing, where any nonlinear structure
present in the original data is destroyed.
"""
original_series, max_iter, tol, seed = args
# Using a seed for reproducibility
if seed is not None:
np.random.seed(seed)
n = len(original_series)
original_fft = np.fft.rfft(original_series)
original_amplitudes = np.abs(original_fft)
sorted_original = np.sort(original_series)
surrogate = np.random.permutation(original_series)
prev_spec_err = np.inf
for i in range(max_iter):
surrogate_fft = np.fft.rfft(surrogate)
surrogate_phases = np.angle(surrogate_fft)
new_fft = original_amplitudes * np.exp(1j * surrogate_phases)
candidate = np.fft.irfft(new_fft, n=n)
ranks = candidate.argsort().argsort()
surrogate = sorted_original[ranks]
current_fft = np.fft.rfft(surrogate)
spec_err = np.mean((original_amplitudes - np.abs(current_fft))**2)
if prev_spec_err > 0 and abs(prev_spec_err - spec_err) / prev_spec_err < tol:
break
prev_spec_err = spec_err
if i == max_iter - 1:
# We use warnings
warnings.warn(f"IAAFT surrogate for seed {seed} reached max_iter ({max_iter}) without explicit convergence.", RuntimeWarning)
return surrogate
[docs]
def iaaft_surrogate(original_series, num_surrogates=1, max_iter=1000, tol=1e-8, n_jobs=-1):
"""Generates one or more surrogate time series using the IAAFT algorithm.
This method creates surrogate series that preserve the power spectrum
(autocorrelation) and the amplitude distribution of the original series.
The generation can be run in parallel on multiple CPU cores.
Parameters
----------
original_series : np.ndarray
The 1D input time series to create surrogates from.
num_surrogates : int, optional
The number of surrogate series to generate. Defaults to 1.
max_iter : int, optional
Maximum number of iterations for the algorithm per surrogate.
Defaults to 1000.
tol : float, optional
Tolerance for convergence. The iteration for a surrogate stops if
the relative change in spectrum error is less than this value.
Defaults to 1e-8.
n_jobs : int, optional
The number of CPU cores to use for parallel generation. -1 means
using all available cores. If set to 1, runs on a single thread.
Defaults to -1.
Returns
-------
np.ndarray or list[np.ndarray]
- If num_surrogates is 1 (default), returns a single NumPy array.
- If num_surrogates > 1, returns a list of NumPy arrays.
Notes
-----
The Iterative Amplitude Adjusted Fourier Transform (IAAFT) algorithm
iteratively adjusts the surrogate's amplitudes and power spectrum to
match the original series [2]. It is used to test against the null hypothesis
of a stationary linear stochastic process with a possibly non-Gaussian
distribution of values. This implementation uses Python's `multiprocessing`
module to parallelize the generation of multiple surrogates.
References
----------
[2] Schreiber, T., & Schmitz, A. (2000). Surrogate time series.
Physica D: Nonlinear Phenomena, 142(3-4), 346-382."""
original_series = np.asarray(original_series)
if original_series.ndim != 1:
raise ValueError("Input 'original_series' must be a one-dimensional array.")
if n_jobs == -1:
n_jobs = multiprocessing.cpu_count()
tasks = [(original_series, max_iter, tol, i) for i in range(num_surrogates)]
if num_surrogates == 1 or n_jobs == 1:
logger.info(f"Generating {num_surrogates} surrogate(s) on a single core...")
surrogate_list = [_iaaft_worker(task) for task in tasks]
else:
logger.info(f"Generating {num_surrogates} surrogate(s) in parallel using {n_jobs} cores...")
with multiprocessing.Pool(processes=n_jobs) as pool:
surrogate_list = pool.map(_iaaft_worker, tasks)
return surrogate_list[0] if num_surrogates == 1 else surrogate_list