'Python 3 RuntimeWarning: overflow encountered in double-scalars - trying to fit multiple gaussians and an offset using scipy.optimise.curve_fit

Hi all as with many peeps, I am new to python.

Updated script that runs to completion but has a OptimizeWarning: Covariance of the parameters could not be estimated error:

I changed the initial guesses into just integers, and added the plot and printing of the data I want.

import pandas as pd
from pandas import Series, DataFrame
import numpy as np
from scipy.optimize import curve_fit
import pylab as plb
import csv
import matplotlib.pyplot as plt

def Two_Gauss_Fit(filename, xStart, xEnd):

    # Choose which file to import data from

    #filename = 'ch12_Fe55_11kcps_10s.csv'

    # Read in data 

    data = np.loadtxt(filename, skiprows=6, delimiter=',')

    # make into lists

    x1 = data[:,0].tolist()
    y1 = data[:,1].tolist()
    z1 = data[:,2].tolist()

    # Select data range

    XStart = x1.index(xStart) 
    XEnd = x1.index(xEnd)
    
   
    #Transform data into integers 

    IGListX = [int(i) for i in x1[XStart:XEnd]]
    IGListY = [int(i) for i in z1[XStart:XEnd]]

    # then into numpy arrays

    IGArrX = np.array(IGListX)
    IGArrY = np.array(IGListY)
    

    # amp = amplitude, x = x axis, centre = centre of Gaussian, sigma = standard diviation

    def two_gauss(x, amp1, centre1, sigma1, amp2, centre2, sigma2, offset):
        y = ((amp1 * np.exp(-(x-centre1)**2 / (2**sigma1 **2))) + (amp2 * np.exp(-(x-centre2)**2 / (2**sigma2 **2))) + offset)
        return y
    
    def two_gauss_fit(x, y):
        # Initial guesses
        amp1 = 100
        centre1 = 19000
        sigma1 = 500
        amp2 = 10
        centre2 = 20000
        sigma2 = 500
        offset = 20
    
        #mean1 = sum(y) / sum(y) # calculates the mean of the data - used for initial guesses
        #sigma1 = np.sqrt(sum(y * (x - mean1) ** 2) / sum(y)) # calculate the standard diviation, used for inital guesses
        popt, pcov = curve_fit(two_gauss,x,y) 
        return popt
        
    amp1, centre1, sigma1, amp2, centre2, sigma2, offset = two_gauss_fit(IGArrX,IGArrY) # Processes the array data from the CSV file using the gauss and gauss_fit functions
       # plot data
    FWHM1 = 2.35482 * sigma1 # caluclates full width half maximum using the standard diviation (FWHM = 2 root 2 ln 2 sigma = 2,35 * sigma)
    FWHM2 = 2.35482 * sigma2
    print('The FWHM of gaussian 1 is', FWHM1)
    print('The FWHM of gaussian 2 is', FWHM2)
    print('The offset of the gaussian baseline is', offset)
    print('The center of the gaussian fit 1 is', centre1)
    print('The center of the gaussian fit 2 is', centre2)


    plt.plot(IGArrX, IGArrY, '-k', label='data')
    plt.plot(IGArrX, two_gauss(IGArrX, *two_gauss_fit(IGArrX, IGArrY)), '--r', label='fit')


    plt.legend()
    plt.title('Gaussian fit,  $f(x) = A e^{(-(x-x_0)^2/(2sigma^2))}$')
    plt.xlabel('Photon energy (meV?)')
    plt.ylabel('Intensity (A)')

    #Show data below
    plt.show() 
     
    

I am using jupyter and Python 3 to try to fit two Gaussians, and I want to built it so it is easy for me to add more Gaussians later if I need to. I have looked at a few examples, but they don't make a lot of sense for me. I can fit one Gaussian successfully, but have now gotten the RuntimeWarning: overflow encountered in double-scalars. I think this is because the fit is going outside of the range due to bad initial guesses? I am not really sure.

Any pointers on this would be awesome, or if there is a better function/way to build the script so I am add more Gaussian later.

Thanks!

from pandas import Series, DataFrame
import numpy as np
from scipy.optimize import curve_fit
import pylab as plb
import csv
import matplotlib.pyplot as plt

def Two_Gauss_Fit(filename, xStart, xEnd):

    # Choose which file to import data from - TBA

    # Read in data 

    data = np.loadtxt(filename, skiprows=6, delimiter=',')

    # make into lists

    x1 = data[:,0].tolist()
    y1 = data[:,1].tolist()
    z1 = data[:,2].tolist()

    # Select data range

    XStart = x1.index(xStart) # Use stated x start for start of fitting (truncate the data)
    XEnd = x1.index(xEnd) # Used stated x end for end of fitting (truncate the data)
    
   
    #Transform data into integers 

    IGListX = [int(i) for i in x1[XStart:XEnd]]
    IGListY = [int(i) for i in z1[XStart:XEnd]]

    # then into numpy arrays

    IGArrX = np.array(IGListX)
    IGArrY = np.array(IGListY)
    

    # amp = amplitude, x = x axis, centre = centre of Gaussian, sigma = standard diviation, offset = linear baseline offset

#equation for two Gaussians and the offset

    def two_gauss(x, amp1, centre1, sigma1, amp2, centre2, sigma2, offset):
        return ((amp1 * np.exp(-(x-centre1)**2 / (2**sigma1 **2))) + (amp2 * np.exp(-(x-centre2)**2 / (2**sigma2 **2))) + offset)

#scipy curve fitting function with some initial guess calculations taken from the data

    def two_gauss_fit(x, y):
        mean1 = sum(x * y) / sum(y) # calculates the mean of the data - used for initial guesses
        sigma1 = np.sqrt(sum(y * (x - mean1) ** 2) / sum(y)) # calculate the standard diviation, used for inital guesses
        popt, pcov = curve_fit(two_gauss,x,y,p0=[min(x), max(x), mean1, sigma1, mean1, sigma1, 1])
        return popt
# get values from popt from fitting function
    amp1, centre1, sigma1, amp2, centre2, sigma2, offset = two_gauss_fit(IGArrX,IGArrY)

#Calculate the FWHM from the two fits
    FWHM1 = 2.35482 * sigma1 # caluclates full width half maximum using the standard diviation (FWHM = 2 root 2 ln 2 sigma = 2,35 * sigma)
    FWHM2 = 2.35482 * sigma2

    ```
    


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source