'Preserving Data Groupings when writing to Worksheet using OpenPyxl

Disclaimer: I am a beginner-level, self-taught casual 'programmer' with Python.

Background: I have a script that takes Groupings of data from an .xlsx Workbook and writes them to separate Worksheets of a seperate .xlsx like so -

Before state (original_data.xlsx): you will note each top-level is a Client, underneath sits multiple levels of underyling data, pertaining to it's parent client. enter image description here

After state (split_data.xlsx): you will note each client and their underlying data is written to an identically named Worksheet in a new .xlsx file like so:
enter image description here

Issue: you will note from the After state that the Groupings / Levels have been lost, whilst all the data is present and has different levels of indentation.

Does anyone know how I might enhance my script to ensure that the Groupings / Levels are preserved? E.g., so you can still expand each Grouping (e.g., Client A) like so: enter image description here

My script: here is the script (sorry it's messy!) which achieves the above, minus the preserved Grouping/Levels.

import openpyxl

from copy import copy
from openpyxl import load_workbook

columns=['A','B','C','D','E','F','G','H','I','J','K','L']
def copy_cell(ws, row,ws_row,ws1):
    for col in columns:
        ws_cell=ws1[col+str(ws_row)]
        new_cell = ws[col+str(row)]

        if ws_cell.has_style:
            new_cell.font = copy(ws_cell.font)
            new_cell.border = copy(ws_cell.border)
            new_cell.fill = copy(ws_cell.fill)
            new_cell.number_format = copy(ws_cell.number_format)
            new_cell.protection = copy(ws_cell.protection)
            new_cell.alignment = copy(ws_cell.alignment)

wb1 = openpyxl.load_workbook('original_data.xlsx')

ws1=wb1.active

indexs=[]
clients=[]

index=1

while ws1['A'+str(index)]:
    if str(ws1['A'+str(index)].alignment.indent)=='0.0':
        indexs.append(index)
        clients.append(ws1['A'+str(index)].value)
    if ws1['A'+str(index)].value  is None:
        indexs.append(index)
        break
    index+=1

wb1.close()

wb = openpyxl.Workbook()

ws=wb.active

start_index=1

headers=['Ownership Structure', 'Fee Schedule', 'Management Style', 'Advisory Firm', 'Inception Date', 'Days in Time Period', 'Adjusted Average Daily Balance (No Div, USD)', 'Assets Billed On (USD)',
'Effective Billing Rate', 'Billing Fees (USD)', 'Bill To Account', 'Model Type']

for y,index in enumerate(indexs):
    try:
        client=0
        if len(clients[y])>=32:
            client=clients[y][:31]
        else:
            client=clients[y]
        wb.create_sheet(client)
        ws=wb[client]
        ws.column_dimensions['A'].width=35
        ws.append(headers)
        row_index=2
        for i in range(start_index,indexs[y+1]):
            ws.append([ws1[col+str(i)].value for col in columns])
            copy_cell(ws,row_index,i,ws1)
            row_index+=1
        start_index=indexs[y+1]
    except:
        pass

wb.save('split_data.xlsx')
wb.close()

try:
    wb1 = openpyxl.load_workbook('split_data.xlsx')
    a=wb1['Sheet']
    wb1.remove(a)
    a=wb1['Sheet1']
    wb1.remove(a)
    wb1.save('split_data.xlsx')
    wb1.close()
except:
    pass

Resources: here is a link to some test data (original_data.xlsx)



Solution 1:[1]

from openpyxl import load_workbook


def get_client_rows(sheet):
    """Get client rows.

    Skip header and then look for row dimensions without outline level
    """
    return [row[0].row for row in sheet.iter_rows(2) if row[0].alignment.indent == 0.0]
    return [
        row_index
        for row_index, row_dimension in sheet.row_dimensions.items()
        if row_index > 1 and row_dimension.outline_level == 0
    ]


def delete_client_block(sheet, start, end):
    """
    Delete rows starting from up to and including end.
    """
    for row in range(start, end + 1):
        sheet.row_dimensions.pop(row, None)
    sheet.delete_rows(start, end - start + 1)


def split_workbook(input_file, output_file):
    """
    Split workbook each main group into its own sheet.

    Not too loose any formatting we copy the current sheet and remove all rows
    which do not belong to extacted group.
    """

    try:
        workbook = load_workbook(input_file)
        data_sheet = workbook.active
        client_rows = get_client_rows(data_sheet)

        for index, client_row in enumerate(client_rows):
            # create new sheet for given client, shorten client as it might be too long
            client_sheet = workbook.copy_worksheet(data_sheet)
            client_sheet.title = data_sheet.cell(client_row, 1).value[:32]

            # delete rows after current client if available
            if index < len(client_rows) - 1:
                row_after_client = client_rows[index + 1]
                delete_client_block(
                    client_sheet, row_after_client, client_sheet.max_row
                )

            # delete rows before current client if available
            if index > 0:
                first_client_row = client_rows[0]
                delete_client_block(
                    client_sheet, first_client_row, client_row - first_client_row + 1
                )

                # move left over dimensions to top of the sheet
                for row_index in list(client_sheet.row_dimensions.keys()):
                    # skip header row dimension
                    if row_index > first_client_row - 1:
                        row_dimension = client_sheet.row_dimensions.pop(row_index)
                        new_index = row_index - client_row + first_client_row
                        row_dimension.index = new_index
                        client_sheet.row_dimensions[new_index] = row_dimension

        del workbook[data_sheet.title]
        workbook.save(output_file)
    finally:
        workbook.close()


if __name__ == "__main__":
    input_file = 'data.xlsx'
    output_file = "data_output.xlsx"

    split_workbook(input_file, output_file)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 William