らんだむな記憶

blogというものを体験してみようか!的なー

文字のCIDを調べたい

perl $FDK/Tools/SharedData/FDKScripts/cmap-tool.pl -e < $FDK/Tools/SharedData/Adobe\ Cmaps/Adobe-Japan1/UniJIS2004-UTF32-H > UniJIS2004-UTF32-H.expanded

して
[chr2cid.py]

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import os, sys, re

def read_cmap(cmap_f):
    cmap = {}
    with open(cmap_f) as f:
        for line in f.readlines():
            line = line.strip()
            m = re.search(r"^\s*<([0-9a-f]+)>\s+(\d+)", line)
            if m:
                uni = int(m.group(1), 16)
                cid = int(m.group(2))
                cmap[uni] = cid
    return cmap

def main():
    c = sys.argv[1]
    cmap_path = sys.argv[2]
    cmap = read_cmap(cmap_path)
    uni = ord(c.decode("utf-8"))
    print cmap[uni]

if __name__ == "__main__":
    main()

とか書いて、

python -B chr2cid.py あ UniJIS2004-UTF32-H.expanded

とかで分かる。