'Is there any way to solve re.sub issue?

sub() missing 1 required positional argument: 'string'

def preprocess_text(sentence):
    #Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)

    #Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)
    
    #Removing multiple spaces
    #sentence = re.sub(r'\s+'+ ',', sentence)
    sentence = re.sub(r'\s+',' ',sentence)

    return sentence

TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

X = []
sentences = list(product_reviews['Görüş'])
for sentence in sentences :
    X.append(preprocess_text(sentence))

X[81]

Solution 1:^[1]

After the label #Removing multiple spaces there is a + where the should be a comma between the regex pattern and the replacement string.

sentence = re.sub(r'\s+'+' ',sentence)

should be

sentence = re.sub(r'\s+',' ',sentence)

Solution 2:^[2]

Your code is missing the initialisation of certain variables. I have slighty modified it to run on it's own.

import re
def preprocess_text(sentence):
    #Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)
    #Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)
    
    #Removing multiple spaces
    #sentence = re.sub(r'\s+'+ ',', sentence)
    sentence = re.sub(r'\s+',' ',sentence)
    return sentence
TAG_RE = re.compile(r'<[^>]+>')
def remove_tags(text):
    return TAG_RE.sub('', text)
X = []
sentences=['test1 a &_€','test   2']
#sentences = list(product_reviews['Görü?'])
for sentence in sentences :
    X.append(preprocess_text(sentence))
print(X[0])
print(X[1])

output

test
test

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1
Solution 2

'Is there any way to solve re.sub issue?

Solution 1:[1]

Solution 2:[2]

Sources

Related Questions

Solution 1:^[1]

Solution 2:^[2]