'Running python script with multiple values of command line arguments

I have a python script for pre-processing audio and it has frame length, frame step and fft length as the command line arguments. I am able to run the code if I have single values of these arguments. I wanted to know if there is a way in which I can run the python script with multiple values of the arguments? For example, get the output if values of fft lengths are 128, 256 and 512 instead of just one value.

The code for pre-processing is as follows:

import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.io import wavfile
import os
import time
import pickle
import random
import argparse

import configlib
from configlib import config as C

import mfccwithpaddingandcmd


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow import keras
from tensorflow.python.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout,BatchNormalization,LSTM,Lambda,Reshape,Bidirectional,GRU
from tensorflow.keras.callbacks import TensorBoard

start = time.time()

classes = ['blinds','fan','light','music','tv']
#dire = r"/mnt/beegfs/home/gehani/test_speech_command/"

parser = configlib.add_parser("Preprocessing config")
parser.add_argument("-dir","--dire", metavar="", help="Directory for the audio files")


def pp():
  data_list=[] #To save paths of all the audio files.....all audio files in list format in data_list
  #data_list-->folder-->files in folder
  for index,label in enumerate(classes):
    class_list=[]
    if label=='silence': #creating silence folder and storing 1sec noise audio files
      silence_path = os.path.join(C["dire"],'silence')
      if not os.path.exists(silence_path):
        os.mkdir(silence_path)
      silence_stride = 2000
      #sample_rate = 16000
      folder = os.path.join(C["dire"],'_background_noise_') #all silence are kept in the background_noise folder

      for file_ in os.listdir(folder):
        if '.wav' in file_:
          load_path = os.path.join(folder,file_)
          sample_rate,y = wavfile.read(load_path)
          for i in range(0,len(y)-sample_rate,silence_stride):
            file_path = "silence/{}_{}.wav".format(file_[:-4],i)
            y_slice = y[i:i+sample_rate]
            wavfile.write(os.path.join(C["dire"],file_path),sample_rate,y_slice)
            class_list.append(file_path)

          
    else:
      folder = os.path.join(C["dire"],label)
      for file_ in os.listdir(folder):
        file_path = '{}/{}'.format(label,file_)    #Ex: up/c9b653a0_nohash_2.wav
        class_list.append(file_path)

    random.shuffle(class_list)              #To shuffle files
    data_list.append(class_list)  #if not a silence file then just append to the datalist

  X = []
  Y = []
  preemphasis = 0.985
  print("Feature Extraction Started")
  for i,class_list in enumerate(data_list): #datalist = all files, class list = folder name in datalist, sample = path to the audio file in that particular class list
    for j,samples in enumerate(class_list):    #samples are of the form classes_name/audio file
      if(samples.endswith('.wav')):
        sample_rate,audio = wavfile.read(os.path.join(C["dire"],samples))
        if(audio.size<sample_rate):
          audio = np.pad(audio,(sample_rate-audio.size,0),mode="constant")
        #print("****")
        #print(sample_rate)
        #print(preemphasis)
        #print(audio.shape)
        coeff = mfccwithpaddingandcmd.mfcc(audio,sample_rate,preemphasis) # 0.985 = preemphasis
        #print("****")
        #print(coeff)
        #print("****")
        X.append(coeff)
        #print(X)
        if(samples.split('/')[0] in classes):
          Y.append(samples.split('/')[0])
        elif(samples.split('/')[0]=='_background_noise_'):
          Y.append('silence')
        #print(len(X))
        #print(len(Y))
          
#X= coefficient array and Y = name of the class

  A = np.zeros((len(X),X[0].shape[0],X[0][0].shape[0]),dtype='object')
  
  for i in range(0,len(X)):
    A[i] = np.array(X[i])      #Converting list X into array A
  
  end1 = time.time()
  print("Time taken for feature extraction:{}sec".format(end1-start))

  
  MLB = MultiLabelBinarizer() # one hot encoding for converting labels into binary form
  
  MLB.fit(pd.Series(Y).fillna("missing").str.split(', '))
  Y_MLB = MLB.transform(pd.Series(Y).fillna("missing").str.split(', '))
  MLB.classes_        #Same like classes array
  print(Y_MLB.shape)

  pickle_out = open("A_all.pickle","wb")      #Writes array A to a file A.pickle
  pickle.dump(A, pickle_out)                  #pickle is the file containing the extracted features
  pickle_out.close()

  pickle_out = open("Y_all.pickle","wb")
  pickle.dump(Y_MLB, pickle_out)
  pickle_out.close()

  pickle_in = open("Y_all.pickle","rb")
  Y = pickle.load(pickle_in)

  X = tf.keras.utils.normalize(X)
  X_train,X_valtest,Y_train,Y_valtest = train_test_split(X,Y,test_size=0.2,random_state=37)
  X_val,X_test,Y_val,Y_test = train_test_split(X_valtest,Y_valtest,test_size=0.5,random_state=37)

  print(X_train.shape,X_val.shape,X_test.shape,Y_train.shape,Y_val.shape,Y_test.shape)

if __name__ == "__main__":
  configlib.parse(save_fname="last_arguments.txt")
  print("Running with configuration:")
  configlib.print_config()
  pp()

The code for MFCC is as follows:

import tensorflow as tf
import scipy.io.wavfile as wav
import numpy as np
import matplotlib.pyplot as plt
import pickle
import argparse

import configlib
from configlib import config as C

# Configuration arguments
parser = configlib.add_parser("MFCC config")

parser.add_argument("-fl","--frame_length", type=int, default=400, metavar="", help="Frame Length")
parser.add_argument("-fs","--frame_step", type=int, default=160, metavar="", help="Frame Step")
parser.add_argument("-fft","--fft_length", type=int, default=512, metavar="", help="FFT length")

#args = parser.parse_args()

def Preemphasis(signal,pre_emp):
  return np.append(signal[0],signal[1:]-pre_emp*signal[:-1])
  
def Paddinggg(framelength,framestep,samplerate):
  frameStart = np.arange(0,samplerate,framestep)
  frameEnd = frameStart + framelength
  padding = min(frameEnd[(frameEnd > samplerate)]) - samplerate
  return padding
 

def mfcc(audio,sample_rate,pre_emp):
  
  audio = np.pad(audio,(Paddinggg(C["frame_length"],C["frame_step"],sample_rate),0),mode='reflect')
  audio = audio.astype('float32')
  #Normalization
  audio = tf.keras.utils.normalize(audio)
  #Preemphasis
  audio = Preemphasis(audio,pre_emp)
  
  stfts = tf.signal.stft(audio,C["frame_length"],C["frame_step"],C["fft_length"],window_fn=tf.signal.hann_window)
  spectrograms = tf.abs(stfts)
  
  num_spectrogram_bins = stfts.shape[-1]
  lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0.0, sample_rate/2.0, 32
  linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,upper_edge_hertz)
  mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
  mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))
  
  # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
  log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
  
  # Compute MFCCs from log_mel_spectrograms and take the first 13.
  return log_mel_spectrograms

print("End")

And the code for configlib is as follows:

from typing import Dict, Any
import logging
import pprint
import sys
import argparse

# Logging for config library
logger = logging.getLogger(__name__)

# Our global parser that we will collect arguments into
parser = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars="@")

# Global configuration dictionary that will contain parsed arguments
# It is also this variable that modules use to access parsed arguments
config:Dict[str, Any] = {}


def add_parser(title: str, description: str = ""):
    """Create a new context for arguments and return a handle."""
    return parser.add_argument_group(title, description)


def parse(save_fname: str = "") -> Dict[str, Any]:
    """Parse given arguments."""
    config.update(vars(parser.parse_args()))
    logging.info("Parsed %i arguments.", len(config))
    # Optionally save passed arguments
    if save_fname:
        with open(save_fname, "w") as fout:
            fout.write("\n".join(sys.argv[1:]))
        logging.info("Saving arguments to %s.", save_fname)
    return config


def print_config():
    """Print the current config to stdout."""
    pprint.pprint(config)

I use the following command to run my python file:

python3.7 preprocessingwithpaddingandcmd.py -fl 1103 -fs 88 -fft 512 -dir /mnt/beegfs/home/gehani/appliances_audio_one_channel

Should I be writing a shell script or python has some options for it?

EDIT 1

I tried using

parser.add_argument('-fft', '--fft_length', type=int, default=[], nargs=3) 

for getting fft length from the command line and used the command

run preprocessingwithpaddingandcmd -dir filepath -fl 1765 -fs 1102 -fft 512 218 64

to run it. But, it gives me this error: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

Can anyone please help?



Solution 1:[1]

I found you can do it by these. mfcc features extraction
You can create your own mfcc features extraction or you can limit window lengths and ceptrums that is enough for simple works except you need logarithms scales where you can use target matrix ( convolution ) or else.
It is logarithms when you use FFT or alternative derivation but mfcc is only extraction where I will provide the sample output in picture.

[ Sample ]:

from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav

import tensorflow as tf
import matplotlib.pyplot as plt

(rate,sig) = wav.read("F:\\temp\\Python\\Speech\\temple_of_love-sisters_of_mercy.wav")
mfcc_feat = mfcc(signal=sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
fbank_feat = logfbank(sig,rate)

plt.plot( mfcc_feat[50:42000,0] )
plt.xlabel("sample")
plt.show()
plt.close()

input('...')

Sample

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Martijn Pieters