'Print REGEX using USER DEFINED FUNCTION

I'm trying to print the variables ccb_3, nome, data, taxa and parcela using the function I defined as "ext_ccb", but when I run the code it returns 3 times (because I defined q as 3) the variable ccb_3.

I tried splitting it into 2 functions (one with the variable ccb_3 e one with the rest that uses REGEX) but it didn't worked to.

'''

from PyPDF2 import PdfFileReader, PdfFileWriter
import re

x = 1

q = 3

def ext_ccb():

  
    nome_ccb = str("Vazio (" + y + ").pdf")
    ccb = PdfFileReader(nome_ccb)
    ccb_obj_1 = ccb.getPage(0)
    ccb_text_1 = ccb_obj_1.extractText()
    ccb_obj_2 = ccb.getPage(1)
    ccb_text_2 = ccb_obj_2.extractText()

    ccb_3 = ccb_text_1[1:8]
    print(ccb_3)

    pattern_nome = re.compile(r'''[^\n][^CPF][A-Z](|\.)\w*\s*.*$

    Nome Completo
    ''', re.M)
    matches_nome = pattern_nome.finditer(ccb_text_1)
    for match in matches_nome:
        nome = str(match)
        nome = nome[40:].replace(r"\n\nNome Completo\n'>", "")
        print(nome)

    pattern_data = re.compile(r'''5\.2\. Modalidade

    \d{2}/\d{2}/\d{4}
    ''')
    matches_data = pattern_data.findall(ccb_text_1)
    for match in matches_data:
        data = match[17:27]
        print(data)

    pattern_taxa = re.compile(r'''Taxa de Juros a\.m\. \(%\)

    \d*,\d*''')
    matches_taxa = pattern_taxa.findall(ccb_text_2)
    for match in matches_taxa:
        taxa = match[24:]
        print(taxa)

    pattern_vparcela = re.compile(r'''Valor das Parcelas

    R\$ \d*,\d*''')
    matches_vparcela = pattern_vparcela.findall(ccb_text_2)
    for match in matches_vparcela:
        parcela = match[23:]
        print(parcela)

while x <= q:
    y = str(x)
    x += 1
    ext_ccb()

'''

What I really need is to insert it into an csv, multiple times from different PDF's, which I already have the code for:

'''

from csv import writer

x = 5
q = 0

while q < x:
    q += 1
    ccb_3 += 1
    nome += 2
    data += 4
    taxa += 4
    parcela += 5
    list_data = [ccb_3, nome, data, taxa, parcela]

    with open('csv_teste.csv', 'a', newline = '') as f_object:  
        writer_object = writer(f_object)
        writer_object.writerow(list_data)  
        f_object.close()

'''

How can I save each data from each PDF and put it into the CSV?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source