Demonstration of lazy load and proxy objects#

Import our packages first It is often nice to have units so we will also import quantities

import urllib
import neo
import quantities as pq
import numpy as np

Let’s get a file NeuralEnsemble maintains a wide variety of small test datasets that are free to use. We can use urllib to pull down one of these files for use

url_repo = "https://web.gin.g-node.org/NeuralEnsemble/ephy_testing_data/raw/master/"
# Get med file
distantfile = url_repo + "micromed/File_micromed_1.TRC"
localfile = "./File_micromed_1.TRC"
urllib.request.urlretrieve(distantfile, localfile)
('./File_micromed_1.TRC', <http.client.HTTPMessage object at 0x7f2a8cd85690>)

create a reader creating a reader for neo is easy it just requires using the name of the desired reader and providing either a filename or a directory name (reader dependent). Since we got a micromed file we will use MicromedIO.

reader = neo.MicromedIO(filename="File_micromed_1.TRC")
reader.parse_header()

as always we can look view some interesting information about the metadata and structure of a file just by printing the reader and it’s header

print(reader)
print(f"Header information: {reader.header}")
MicromedIO: File_micromed_1.TRC
nb_block: 1
nb_segment:  [1]
signal_streams: [Signals (chans: 64)]
signal_channels: [FP1, FP2, AF7, AF3 ... Trig , VEOG , HEOG , EMG]
spike_channels: []
event_channels: [Trigger, Note, Event A, Event B]

Header information: {'nb_block': 1, 'nb_segment': [1], 'signal_streams': array([('Signals', '0')], dtype=[('name', '<U64'), ('id', '<U64')]), 'signal_channels': array([('FP1', '0', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FP2', '1', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('AF7', '2', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('AF3', '3', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('AFz', '4', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('AF4', '5', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('AF8', '6', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F7', '7', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F5', '8', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F3', '9', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F1', '10', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('Fz', '11', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F2', '12', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F4', '13', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F6', '14', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('F8', '15', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FT7', '16', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FC5', '17', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FC3', '18', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FC1', '19', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FCz', '20', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FC2', '21', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FC4', '22', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FC6', '23', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('FT8', '24', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('T7', '25', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('C5', '26', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('C3', '27', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('C1', '28', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('Cz', '29', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('C2', '30', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('C4', '31', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('C6', '32', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('T8', '33', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('TP7', '34', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CP5', '35', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CP3', '36', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CP1', '37', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CPz', '38', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CP2', '39', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CP4', '40', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('CP6', '41', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('TP8', '42', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('T5', '43', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('P5', '44', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('P3', '45', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('P1', '46', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('Pz', '47', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('P2', '48', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('P4', '49', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('P6', '50', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('T6', '51', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('PO7', '52', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('PO3', '53', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('POz', '54', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('PO4', '55', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('PO8', '56', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('O1', '57', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('Oz', '58', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('O2', '59', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('Trig', '60', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('VEOG', '61', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('HEOG', '62', 256., 'u2', 'uV', 0.09765625, -3200., '0'),
       ('EMG', '63', 256., 'u2', 'uV', 0.09765625, -3200., '0')],
      dtype=[('name', '<U64'), ('id', '<U64'), ('sampling_rate', '<f8'), ('dtype', '<U16'), ('units', '<U64'), ('gain', '<f8'), ('offset', '<f8'), ('stream_id', '<U64')]), 'spike_channels': array([],
      dtype=[('name', '<U64'), ('id', '<U64'), ('wf_units', '<U64'), ('wf_gain', '<f8'), ('wf_offset', '<f8'), ('wf_left_sweep', '<i8'), ('wf_sampling_rate', '<f8')]), 'event_channels': array([('Trigger', '', b'event'), ('Note', '', b'event'),
       ('Event A', '', b'epoch'), ('Event B', '', b'epoch')],
      dtype=[('name', '<U64'), ('id', '<U64'), ('type', 'S5')])}

Now let’s make a function that we want to apply to look at lazy vs eager uses of the API

def apply_my_fancy_average(sig_list):
    """basic average along triggers and then channels
    here we go back to numpy with magnitude
    to be able to use np.stack.

    Because neo uses quantities to keep track of units
    we can always get just the magnitude of an array
    with `.magnitude`
    """
    sig_list = [s.magnitude for s in sig_list]
    sigs = np.stack(sig_list, axis=0)
    return np.mean(np.mean(sigs, axis=0), axis=1)

Let’s set our limits for both cases. We will use quantities to include time dimensions.

lim_start = -20 * pq.ms  # 20 milliseconds before
lim_end = +20 * pq.ms  # 20 milliseconds after

We start with eager (where lazy=False.) Everything is loaded into memory. We will read a segment of data. This includes analog signal data and events data (final contents of a segment are dependent on the underlying IO being used)

seg = reader.read_segment(lazy=False)
triggers = seg.events[0]
anasig = seg.analogsignals[0]  # here anasig contain the whole recording in memory
all_sig_chunks = []
for t in triggers.times:
    t0, t1 = (t + lim_start), (t + lim_end)
    anasig_chunk = anasig.time_slice(t0, t1)
    all_sig_chunks.append(anasig_chunk)

# After pulling all data into memory and then iterating through triggers
# we end by doing our average
m1 = apply_my_fancy_average(all_sig_chunks)

Here we do lazy=True, i.e. we do lazy loading. We only load the data that we want into memory and we use a proxy object for our analogsignal until we load it chunk by chunk (no running out of memory!)

seg = reader.read_segment(lazy=True)
triggers = seg.events[0].load(time_slice=None)  # this load all triggers in memory
anasigproxy = seg.analogsignals[0]  # this is a proxy
all_sig_chunks = []
for t in triggers.times:
    t0, t1 = (t + lim_start), (t + lim_end)
    # at this step we load actual data into memory, but notice that we only load one
    # chunk of data at a time, so we reduce the memory strain
    anasig_chunk = anasigproxy.load(time_slice=(t0, t1))  # here real data are loaded
    all_sig_chunks.append(anasig_chunk)

# Finally we apply the same average as we did above
m2 = apply_my_fancy_average(all_sig_chunks)

We see that either way the result is the same, but we do not exhaust our RAM/memory

print(f"Eagerly loading data and averaging: {m1}")
print(f"Lazy loading data and average {m2}")
Eagerly loading data and averaging: [28.009033   19.662857   26.106644   26.94664    14.271545    5.2375793
  0.71487427  3.2821655  11.3471985   3.28331   ]
Lazy loading data and average [28.009033   19.662857   26.106644   26.94664    14.271545    5.2375793
  0.71487427  3.2821655  11.3471985   3.28331   ]

Total running time of the script: (0 minutes 2.244 seconds)

Gallery generated by Sphinx-Gallery