python實現excel公式格式化的示例代碼
之前跟一些小夥伴有個討論:
大概就是很多跟數據打交道的朋友都面對過很復雜的excel公式,有時嵌套層數特別多,肉眼觀看很容易蒙圈。
有瞭這樣的需求,我就有瞭解決問題的想法,說幹就幹,於是一個比較牛逼的excel公式格式化的工具就出現瞭。
效果體驗
先看看效果吧:
=IF(C11>100%*C4,IF(C11<=200%*C4,C11*50%-C4*15%,C11*60%-C4*35%),IF(C11<=C4*50%,C11*30%,C11*40%-C4*5%))
的格式化結果是:
=IF( C11>100%*C4, IF( C11<=200%*C4, C11*50%-C4*15%, C11*60%-C4*35% ), IF( C11<=C4*50%, C11*30%, C11*40%-C4*5% ) )
(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100-MIN(SMA(MAX(CLOSE-DELAY( CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12))/(MAX(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12, 1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12)-MIN(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS( CLOSE-DELAY(CLOSE,1)),12,1)*100,12))
的格式化結果為:
( SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1) / SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1) * 100-MIN( SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1) / SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100, 12 ) ) / ( MAX( SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1) / SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100, 12 ) - MIN( SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1) / SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100, 12 ) )
=IF(ROW()>COLUMN(),"",IF(ROW()=COLUMN(),$B15,ROUNDDOWN($B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")&56),0)))
的格式化結果為:
=IF( ROW()>COLUMN(), "", IF( ROW()=COLUMN(), $B15, ROUNDDOWN( $B15*INDIRECT( SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"") & 56 ), 0 ) ) )
(文末有體驗網址)
不過接下來,將公佈這套格式化程序的完整代碼和開發思想,有技術能力的小夥伴可以考慮改進該代碼。
完整代碼
__author__ = 'xiaoxiaoming' from collections import deque import re class Node: def __init__(self, parent=None, tab_size=0): self.parent = parent self.tab_size = tab_size self.data = [] def is_single_node(self): for e in self.data: if not isinstance(e, str): return False return True def get_single_text(self): return "".join(self.data) def split_text_blocks(excel_func_text): """ 將excel公式字符串,按照一定的規則切割成數組 :param excel_func_text: 被切割的excel公式字符串 :return: 切割後的結果 """ excel_func_text = excel_func_text.replace('\n', '').replace('\r', '') excel_func_text = re.sub(" +", " ", excel_func_text) lines = [] i, j = 0, 0 while j < len(excel_func_text): c = excel_func_text[j] if (c == '(' and excel_func_text[j + 1] != ')') or c == ',': lines.append(excel_func_text[i:j + 1]) i = j = j + 1 elif c == ')' and excel_func_text[j - 1] != '(': if i < j: lines.append(excel_func_text[i:j]) i = j # 起始文件塊置於)處 # 以下代碼查找,如果中間不包含(或),則將)和,之間的文本塊加入到劃分結果 k = excel_func_text.find(",", j + 1) l = excel_func_text.find("(", j + 1, k) m = excel_func_text.find(")", j + 1, k) if k != -1 and l == -1 and m == -1: lines.append(excel_func_text[i:k + 1]) i = j = k + 1 elif j + 1 < len(excel_func_text) and excel_func_text[j + 1] != ')': lines.append(")") lines.append(excel_func_text[j + 1]) i = j = j + 2 else: lines.append(")") i = j = j + 1 elif c == '"': j = excel_func_text.find('"', j + 1) + 1 else: j += 1 return lines blank_char_count = 2 def combine_node(root, text_max_length=60, max_combine_layer=3): """ 合並最內層的隻有純文本子節點的節點為單個文本節點 :param root: 被合並的節點 :param text_max_length: 合並後的文本長度不超過該參數,則應用該合並替換原節點 :param max_combine_layer: 最大合並層數 :return: """ for _ in range(max_combine_layer): no_change = True stack = deque([root]) while stack: node = stack.pop() tmp = {} for i, e in enumerate(node.data): if isinstance(e, Node): if e.is_single_node(): single_text = e.get_single_text() if len(single_text) < text_max_length: tmp[i] = single_text else: stack.append(e) for i, e in tmp.items(): node.data[i] = e if len(tmp) != 0: no_change = False if no_change: break def node_next_line(node): for i, e in enumerate(node.data): if isinstance(e, str): if i == 0 or i == len(node.data) - 1: tab = node.tab_size - 1 else: tab = node.tab_size yield f"{' ' * blank_char_count * tab}{e}" else: yield from node_next_line(e) def excel_func_format(excel_func_text, blank_count=2, combine_single_node=True, text_max_length=60, max_combine_layer=3): """ 將excel公式格式化成比較容易閱讀的格式 :param excel_func_text: 被格式化的excel公式字符串 :param blank_count: 最終顯示的格式化字符串的1個tab用幾個空格表示 :param combine_single_node: 是否合並純文本節點,該參數設置為True後面的參數才生效 :param text_max_length: 合並後的文本長度不超過該參數,則應用該合並替換原節點 :param max_combine_layer: 最大合並層數 :return: 格式化後的字符串 """ global blank_char_count blank_char_count = blank_count blocks = split_text_blocks(excel_func_text) # print("\n".join(blocks)) # print('-----------拆分結果-----------') tab_size = 0 node = root = Node() for block in blocks: if block.endswith("("): tab_size += 1 child_node = Node(node, tab_size) node.data.append(child_node) node = child_node node.data.append(block) elif block.startswith(")"): tab_size -= 1 node.data.append(block) node = node.parent else: node.data.append(block) if combine_single_node: combine_node(root, text_max_length, max_combine_layer) result = [line for line in node_next_line(root)] return "\n".join(result)
處理流程淺析
下面都以如下公式作為示例:
=IF(ROW()>COLUMN(),"",IF(ROW()=COLUMN(),$B15,ROUNDDOWN($B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")&56),0)))
文本分塊切分
def split_text_blocks(excel_func_text): """ 將excel公式字符串,按照一定的規則切割成數組 :param excel_func_text: 被切割的excel公式字符串 :return: 切割後的結果 """ excel_func_text = excel_func_text.replace('\n', '').replace('\r', '') excel_func_text = re.sub(" +", " ", excel_func_text) lines = [] i, j = 0, 0 while j < len(excel_func_text): c = excel_func_text[j] if (c == '(' and excel_func_text[j + 1] != ')') or c == ',': lines.append(excel_func_text[i:j + 1]) i = j = j + 1 elif c == ')' and excel_func_text[j - 1] != '(': if i < j: lines.append(excel_func_text[i:j]) i = j # 起始文件塊置於)處 # 以下代碼查找,如果中間不包含(或),則將)和,之間的文本塊加入到劃分結果 k = excel_func_text.find(",", j + 1) l = excel_func_text.find("(", j + 1, k) m = excel_func_text.find(")", j + 1, k) if k != -1 and l == -1 and m == -1: lines.append(excel_func_text[i:k + 1]) i = j = k + 1 elif j + 1 < len(excel_func_text) and excel_func_text[j + 1] != ')': lines.append(")") lines.append(excel_func_text[j + 1]) i = j = j + 2 else: lines.append(")") i = j = j + 1 elif c == '"': j = excel_func_text.find('"', j + 1) + 1 else: j += 1 return lines s = """=IF(ROW()>COLUMN(),"",IF(ROW()=COLUMN(),$B15,ROUNDDOWN($B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")&56),0))) """ blocks = split_text_blocks(s) for block in blocks: print(block)
的運行結果為:
=IF( ROW()>COLUMN(), "", IF( ROW()=COLUMN(), $B15, ROUNDDOWN( $B15*INDIRECT( SUBSTITUTE( ADDRESS( 1, 3+COLUMN()-ROW(), 4 ), 1, "" ) & 56 ), 0 ) ) )
這端代碼首先替換掉所有的換行符,將多個空格替換為單個空格,然後將左右括號和逗號作為切分點進行切分。
但存在一些特殊情況,例如ROW()和COLUMN()括號內部沒有任何內容,所有這種括號應該作為普通字符處理,另外被””包含的字符串可能包含括號,也應該作為普通字符。
構建多叉樹層次結構
設計數據結構:
class Node: def __init__(self, parent=None, tab_size=0): self.parent = parent self.tab_size = tab_size self.data = []
parent存儲父節點的指針,tab_size存儲當前節點的層級,data存儲當前節點的所有數據。
構建代碼:
tab_size = 0 node = root = Node() for block in blocks: if block.endswith("("): tab_size += 1 child_node = Node(node, tab_size) node.data.append(child_node) node = child_node node.data.append(block) elif block.startswith(")"): tab_size -= 1 node.data.append(block) node = node.parent else: node.data.append(block)
構建完畢後,這段數據在內存中的結構(僅展示data)如下:
遍歷打印這顆多叉樹
def node_next_line(node): for i, e in enumerate(node.data): if isinstance(e, str): if i == 0 or i == len(node.data) - 1: tab = node.tab_size - 1 else: tab = node.tab_size yield f"{' ' * 2 * tab}{e}" else: yield from node_next_line(e) result = [line for line in node_next_line(root)] print("\n".join(result))
結果:
=IF( ROW()>COLUMN(), "", IF( ROW()=COLUMN(), $B15, ROUNDDOWN( $B15*INDIRECT( SUBSTITUTE( ADDRESS( 1, 3+COLUMN()-ROW(), 4 ), 1, "" ) & 56 ), 0 ) ) )
合並最內層的節點
顯然將最內層的node5節點合並一下閱讀性更好:
首先給數據結構增加判斷是否為純文本節點的方法:
class Node: def __init__(self, parent=None, tab_size=0): self.parent = parent self.tab_size = tab_size self.data = [] def is_single_node(self): for e in self.data: if not isinstance(e, str): return False return True def get_single_text(self): return "".join(self.data)
下面是合並純文本節點的實現,max_combine_layer決定瞭合並的最大次數,如果合並後長度超過text_max_length參數,則不應用這次合並:
from collections import deque def combine_node(root, text_max_length=60, max_combine_layer=3): """ 合並最內層的隻有純文本子節點的節點為單個文本節點 :param root: 被合並的節點 :param text_max_length: 合並後的文本長度不超過該參數,則應用該合並替換原節點 :param max_combine_layer: 最大合並層數 :return: """ for _ in range(max_combine_layer): no_change = True stack = deque([root]) while stack: node = stack.pop() tmp = {} for i, e in enumerate(node.data): if isinstance(e, Node): if e.is_single_node(): single_text = e.get_single_text() if len(single_text) < text_max_length: tmp[i] = single_text else: stack.append(e) for i, e in tmp.items(): node.data[i] = e if len(tmp) != 0: no_change = False if no_change: break
合並一次:
combine_node(root, 100, 1) result = [line for line in node_next_line(root)] print("\n".join(result))
結果:
=IF( ROW()>COLUMN(), "", IF( ROW()=COLUMN(), $B15, ROUNDDOWN( $B15*INDIRECT( SUBSTITUTE( ADDRESS(1,3+COLUMN()-ROW(), 4), 1, "" ) & 56 ), 0 ) ) )
合並二次:
combine_node(root, 100, 2) result = [line for line in node_next_line(root)] print("\n".join(result))
結果:
=IF( ROW()>COLUMN(), "", IF( ROW()=COLUMN(), $B15, ROUNDDOWN( $B15*INDIRECT( SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"") & 56 ), 0 ) ) )
合並三次:
combine_node(root, 100, 3) result = [line for line in node_next_line(root)] print("\n".join(result))
結果:
=IF( ROW()>COLUMN(), "", IF( ROW()=COLUMN(), $B15, ROUNDDOWN( $B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")&56), 0 ) ) )
合並三次後的內存情況:
體驗網址
http://xiaoxiaoming.xyz:8088/excel
不保證永久有效。
到此這篇關於python實現excel公式格式化的示例代碼的文章就介紹到這瞭,更多相關python excel公式格式化內容請搜索WalkonNet以前的文章或繼續瀏覽下面的相關文章希望大傢以後多多支持WalkonNet!
推薦閱讀:
- python 使用openpyxl讀取excel數據
- python處理excel文件之xlsxwriter 模塊
- python 字典生成樹狀圖的實例
- 如何用python合並多個excel文件
- 基於Python實現文本文件轉Excel