end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

LibreDWGのdwgreadコマンド + python で AutoCAD DWGファイルからのtext抽出

https://end0tknr.hateblo.jp/entry/20230218/1676694016

先日の上記entryの続きです。

前回は、dwggrepコマンドを使用しましたが、今回は、dwgreadコマンドを使用します。

このコマンドでdwgをdxfやjson等の形式で出力できますので、 今回は、dwg→json化した結果をpythonで、ゴニョゴニョします。

$ dwgread --help

Usage: dwgread [OPTION]... DWGFILE
Reads the DWG into some optional output format to stdout or some file,
and prints error, success or verbose internal progress to stderr.

  -v[0-9], --verbose [0-9]  verbosity
  -O fmt,  --format fmt     fmt: DXF, DXFB, JSON, GeoJSON
           Planned output formats:  YAML, XML/OGR, GPX, SVG, PS
  -o outfile                also defines the output fmt. Default: stdout
           --help           display this help and exit
           --version        output version information and exit

GNU LibreDWG online manual: <https://www.gnu.org/software/libredwg/>

で、以下の通りです。

#!/usr/bin/python
# -*- coding: utf-8 -*-
import json
import re
import subprocess
import sys

DWG_READ_CMD = "/usr/local/bin/dwgread"
DWG_READ_OPT = "--format JSON"
DWG_ENCODE   = "CP932"

# shift_jisの影響でしょうか、「\上」のような
# 謎のescapeをsanitizeする為
RE_ESCAPE = re.compile( "(\\\\)[^a-z]",re.IGNORECASE)

def main():
    dwg_file_path = sys.argv[1]
    dwg_txts = extract_txts(dwg_file_path)
    print( "".join(dwg_txts) )

def extract_txts(dwg_file_path):
    dwg_objs = read_dwg_file(dwg_file_path)

    txts = []
    for obj in dwg_objs["OBJECTS"]:
        for text_key in ["text_value","text"]:
            if not text_key in obj:
                continue
        
            dwg_txt = obj[text_key].strip()
            if text_key == "text":
                dwg_txt = dwg_txt.lstrip("{")
                dwg_txt = dwg_txt.rstrip("}")
                
            if len(dwg_txt) == 0:
                continue

            txts.append(dwg_txt)
    return txts

def read_dwg_file(dwg_file_path):

    cmd = " ".join([ DWG_READ_CMD,DWG_READ_OPT,dwg_file_path ])
    
    (stdout,stderr,return_code) = exec_subprocess(cmd)
    if not stdout:
        return None

    stdout_lines = stdout.decode(DWG_ENCODE).split("\n")
    new_stdout_lines = []
    for i, stdout_line in enumerate( stdout_lines ):
        # \Pは、autocadでは、改行らしい
        stdout_line = stdout_line.replace("\\\\P"," ")
        
        stdout_line = RE_ESCAPE.sub("", stdout_line )
        new_stdout_lines.append(stdout_line)
        
    stdout = "\n".join(new_stdout_lines)
    return json.loads( stdout )

# cf. https://qiita.com/fetaro/items/a3b3bd4ea197b600ac45
def exec_subprocess(cmd: str, raise_error=True):
    child = subprocess.Popen( cmd,
                              shell=True,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE )
    stdout, stderr = child.communicate()
    rt = child.returncode
    if rt != 0 and raise_error:
        print("ERROR",stderr,file=sys.stderr)
        return (None,None,None)

    return stdout, stderr, rt

if __name__ == '__main__':
    main()