python常用脚本

excel转metadata

pip install openpyxl
python excel2metadata.py report.xlsx report.json
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Excel 元数据提取工具(终极兼容版)
支持:公式、数组公式、下拉列表、批注、超链接
修复:id 参数、MultiCellRange、ArrayFormula 等 openpyxl 兼容问题
"""

# ===== 补丁 1:修复 DataValidation 的 id/xr:uid 问题 =====
try:
    from openpyxl.worksheet.datavalidation import DataValidation
    _orig_init = DataValidation.__init__
    def _patched_init(self, *args, **kwargs):
        kwargs.pop('id', None)
        kwargs.pop('xr:uid', None)
        return _orig_init(self, *args, **kwargs)
    DataValidation.__init__ = _patched_init
except Exception:
    pass
# =========================================================

import sys
import json
import argparse
from openpyxl import load_workbook


def extract_excel_metadata(file_path):
    try:
        wb = load_workbook(filename=file_path, data_only=False)
    except Exception as e:
        raise RuntimeError(f"无法打开 Excel 文件 '{file_path}':{e}")

    output = {"sheets": {}}

    for sheet_name in wb.sheetnames:
        ws = wb[sheet_name]
        sheet_data = {
            "cells": {},
            "data_validations": {}
        }

        for row in ws.iter_rows():
            for cell in row:
                if (cell.value is None 
                    and cell.data_type != 'f' 
                    and cell.comment is None 
                    and cell.hyperlink is None):
                    continue

                coord = cell.coordinate
                info = {}

                # === 修复点:安全处理 ArrayFormula ===
                if cell.data_type == 'f':
                    # 普通公式是 str,数组公式是 ArrayFormula 对象
                    if hasattr(cell.value, 'text'):
                        info["formula"] = cell.value.text
                    else:
                        info["formula"] = str(cell.value)
                else:
                    info["value"] = str(cell.value) if cell.value is not None else ""
                # =====================================

                if cell.comment:
                    info["comment"] = cell.comment.text

                if cell.hyperlink:
                    info["hyperlink"] = cell.hyperlink.target

                if info:
                    sheet_data["cells"][coord] = info

        # 提取数据验证
        for dv in ws.data_validations.dataValidation:
            sqref_str = str(dv.sqref) if dv.sqref else ""
            ranges = sqref_str.split()
            for rng in ranges:
                dv_info = {
                    "type": dv.type,
                    "operator": getattr(dv, 'operator', None),
                    "allow_blank": bool(getattr(dv, 'allowBlank', False)),
                    "show_input_message": bool(getattr(dv, 'showInputMessage', False)),
                    "show_error_message": bool(getattr(dv, 'showErrorMessage', False)),
                }
                if dv.formula1 is not None:
                    dv_info["formula1"] = dv.formula1
                    if dv.type == "list":
                        f1 = dv.formula1
                        if f1 and not f1.startswith("="):
                            clean_f1 = f1.strip('"')
                            if ',' in clean_f1:
                                dv_info["options"] = [opt.strip() for opt in clean_f1.split(',')]
                            else:
                                dv_info["options"] = [clean_f1]
                        else:
                            dv_info["source_range"] = f1
                if dv.formula2 is not None:
                    dv_info["formula2"] = dv.formula2

                sheet_data["data_validations"][rng] = dv_info

        output["sheets"][sheet_name] = sheet_data

    return output


def main():
    parser = argparse.ArgumentParser(description="Excel 元数据导出工具(含公式、下拉、批注)")
    parser.add_argument("input", help="输入的 .xlsx 文件路径")
    parser.add_argument("output", nargs="?", default=None, help="输出 JSON 路径(可选)")
    args = parser.parse_args()

    try:
        data = extract_excel_metadata(args.input)
        json_str = json.dumps(data, indent=4, ensure_ascii=False)
        if args.output:
            with open(args.output, "w", encoding="utf-8") as f:
                f.write(json_str)
            print(f"✅ 已保存到: {args.output}")
        else:
            print(json_str)
    except Exception as e:
        print(f"❌ 错误: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()
目录
冀ICP备2021025979号-1