'Take average of every 2 consecutive elements and insert them back into array

I have an array and want to find the average between 2 numbers and add an additional element between the 2 numbers. For example, if I start with

x = np.array([1, 3, 5, 7, 9])

I want to end up with

[1, 2, 3, 4, 5, 6, 7, 8, 9]

How would I go about doing this?



Solution 1:[1]

Here is a simple and efficient method using numpy.repeat:

x = np.array([1, 3, 5, 7, 9])

xx = x.repeat(2)
(xx[1:]+xx[:-1]) / 2
# array([1., 2., 3., 4., 5., 6., 7., 8., 9.])

# or if you want to preserve int dtype
(xx[1:]+xx[:-1]) // 2
# array([1, 2, 3, 4, 5, 6, 7, 8, 9])

A simple benchmarking script:

import numpy as np
from numpy.lib import stride_tricks
from itertools import zip_longest

def Brandt(x,forceint=False):
    y = np.diff(x)/2 + x[:-1]
    z = [n for pair in zip_longest(x,y) for n in pair if n]
    return np.asarray(z, int) if forceint else np.asarray(z)

def Ch3steR(x):
    strd = x.strides[0]
    vals = stride_tricks.as_strided(x, shape=(len(x) - 1, 2),
                                    strides=(strd, strd))
    means = vals.mean(axis=1)
    return np.insert(x, np.arange(1, len(x)), means)

def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w
def Tankred(x):
    return np.insert(x, np.arange(1, len(x)), moving_average(x, 2))

def fskj(x):
    avg = (x[:-1] + x[1:]) / 2
    zipped  = np.stack((x[:-1], avg), -1)
    flattened = zipped.flatten()
    return np.append(flattened, x[-1])

def user1740577(x):
    for i in np.arange(0,len(x)+2,2):
        x = np.insert(x,i+1,np.average(x[i:i+2]))    
    return x
        
def loopywalt(x,forceint=False):
    xx = x.repeat(2)
    return (xx[:-1]+xx[1:]) // 2 if forceint else (xx[:-1]+xx[1:]) / 2

all_ = (Brandt,Ch3steR,Tankred,fskj,user1740577,loopywalt)
blacklist=[]
from timeit import timeit
rng = np.random.default_rng(seed=1)
for ex in [np.array([1,3,5,7,9]),rng.integers(1,1000,1000),
           rng.integers(1,1000,1000000)]:
    print();print("n =",len(ex))
    for method in all_:
        if method in blacklist:
            continue
        t = timeit(lambda:method(ex),number=10)
        if t<0.1:
            t = timeit(lambda:method(ex),number=1000)
        else:
            blacklist.append(method)
            t *= 100
        print(method.__name__,t,'ms')

Results:

n = 5
Brandt 0.018790690000969335 ms
Ch3steR 0.06143478500052879 ms
Tankred 0.039249178998943535 ms
fskj 0.026057840999783366 ms
user1740577 0.15504688399960287 ms
loopywalt 0.0033979790005105315 ms

n = 1000
Brandt 0.4772341360003338 ms
Ch3steR 0.10018322700125282 ms
Tankred 0.0674891500002559 ms
fskj 0.03475799899933918 ms
user1740577 17.72124929993879 ms
loopywalt 0.017431922000469058 ms

n = 1000000
Brandt 491.9887762000144 ms
Ch3steR 56.97805079998943 ms
Tankred 44.63849610001489 ms
fskj 25.709937600004196 ms
loopywalt 20.622111500051687 ms

Solution 2:[2]

You can use numpy.insert and a moving average to fill the missing values:

import numpy as np

x = np.array([1, 3, 5, 7, 9])

# copied from: https://stackoverflow.com/a/54628145/5665958
def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w

x_filled = np.insert(x, np.arange(1, len(x)), moving_average(x, 2))

x_filled: array([1, 2, 3, 4, 5, 6, 7, 8, 9])

Solution 3:[3]

You can take advantage of numpy.lib.stride_tricks.as_strided1 to find mean of every 2 values. Then use np.insert to insert values into the array.

from numpy.lib import stride_tricks
x = np.array([1, 3, 5, 7, 9])
strd = x.strides[0]
vals = stride_tricks.as_strided(x, shape=(len(x) - 1, 2), strides=(strd, strd))

# print(vals)
# [[1 3]
#  [3 5]
#  [5 7]
#  [7 9]]

means = vals.mean(axis=1)
print(means)
# [2. 4. 6. 8.]

np.insert(x, np.arange(1, len(x)), means)
# array([1, 2, 3, 4, 5, 6, 7, 8, 9])

1. More details about strides How to understand NumPy strides for layman and this post by Rick M.

Solution 4:[4]

Simply put:

import numpy as np
x = np.array([1, 3, 5, 7, 9])

# Use `itertools.zip_longest` to wrap averages and inputs together
from itertools import zip_longest

# compute the averages
y = np.diff(x)/2 + x[:-1]

# mix them (order, in this case)
z = [n for pair in zip_longest(x,y) for n in pair if n]

# make it a numpy-array (of ints)
np.asarray(z, int)
array([1, 2, 3, 4, 5, 6, 7, 8, 9])

Solution 5:[5]

Another fast version, slicing the original and the averages into a new array:

def Kelly(x):
    avg = (x[1:] + x[:-1]) / 2
    res = np.empty(x.size + avg.size)
    res[::2] = x
    res[1::2] = avg
    return res

With loopy walt's benchmark (had to remove default_rng and use randint to get it to run):

n = 5
Brandt 0.023532982973847538 ms
Ch3steR 0.05084541701944545 ms
Tankred 0.029509164043702185 ms
fskj 0.01449447899358347 ms
user1740577 0.11903033603448421 ms
loopywalt 0.002962342055980116 ms
Kelly 0.004625919042155147 ms

n = 1000
Brandt 0.415388774999883 ms
Ch3steR 0.11717381200287491 ms
Tankred 0.07865125295938924 ms
fskj 0.026592836948111653 ms
user1740577 15.592256403760985 ms
loopywalt 0.02348607504973188 ms
Kelly 0.009647938015405089 ms

n = 1000000
Brandt 531.4903213002253 ms
Ch3steR 139.16819099686109 ms
Tankred 125.81092769978568 ms
fskj 63.73856549616903 ms
loopywalt 55.087829200783744 ms
Kelly 14.159472199389711 ms

Solution 6:[6]

import numpy as np
x = np.array([1, 3, 5, 7, 9])
avg = (x[:-1] + x[1:]) / 2 # calculate average value of all consecutive pairs: [2, 4, 6, 8]
zipped  = np.stack((x[:-1], avg), -1) # zip x and avg, except for last element in x: [[1, 2], [3, 4], [5, 6], [7, 8]]
flattened = zipped.flatten() # Flatten to form 1-d array: [1, 2, 3, 4, 5, 6, 7, 8]
requested_result = np.append(flattened, x[-1]) # Add last element of x: [1, 2, 3, 4, 5, 6, 7, 8, 9]

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2 Tankred
Solution 3
Solution 4
Solution 5 Kelly Bundy
Solution 6