PDF lib中国語出力(四)PDF lib受信のいくつかのテキスト入力形式について

7642 ワード

WINDOWS

PDF libのtextformatパラメータは、次のようにテキスト入力形式を設定します.
bytes:文字列内のバイトごとに1文字に対応します.主に8ビット符号化に応用される.utf 8:文字列はUTF-8符号化である.ebcdicutf 8:文字列はEBCDICのUTF-8符号化であり、IBM iSeriesおよびzSeriesにのみ使用される.utf 16:文字列はUTF-16符号化である.文字列がUnicodeのタグバイト番号(BOM)で始まる場合、PDF libはBOM情報を受信して文字列の先頭から削除します.文字列にBOMがない場合、文字列のバイト順はホストのバイト順に依存します.Intel x 86システムはリトルエンド(little-endian,0 xFFFE)であり、SparcとPowerPCシステムは大テール(big-endian,0 xFEFF)である.utf 16 be:文字列は、最後尾バイト順のUTF-16符号化である.BOMには特別な処理はありません.utf 16 le:文字列は、末尾バイト順のUTF-16符号化である.BOMには特別な処理はありません.Auto:8ビット符号化では「bytes」に相当し、ワイド文字列(Unicode,glyphid,UCS 2またはUTF 16 CMap)では「utf 16」に相当する.
プログラミング言語では、Unicode文字列を自動的に処理できる言語をCOMであるUnicode言語(Unicode-capable)をサポートする言語と呼ぶ.NET,Java,REALbasicおよびTclなど.Unicode文字列の特殊な処理が必要な言語をUnicode言語(non-Unicode-capable)と呼び、C,C++,Cobol,Perl,PHP,PythonおよびRPGなどである.non-Unicode-capable言語では、「auto」設定がテキスト文字列の大部分を正しく処理します.Unicode-capable言語の場合、textformatパラメータのデフォルト値は「utf 16」です.non-Unicode-capable言語のデフォルト値は「auto」です.このほか、PDF libではSGMLやHTMLでよく使われる文字参照方法(Character Reference)もサポートされています.パラメータcharrefを真に設定し、textformatを「bytes」に設定することを前提とします.

PDF_set_parameter(p, "charref", "true");

PDF_set_parameter(p, "textformat", "bytes");

有効なCharacter Referenceをいくつか示します.
soft hyphen
soft hyphen
soft hyphen
€ Euro glyph (hexadecimal)
€ Euro glyph (decimal)
€ Euro glyph (entity name)
< less than sign
> greater than sign
& ampersand sign
Α Greek Alpha
次の例は、Cソースプログラム(生成されたpdfファイル–PDF lib_cs 4.pdfを添付)です.

/*******************************************************************/

/* This example demostrates output Chinese Simplified text with different

/* textformat option under Chinese Simplifed Windows.

/*******************************************************************/

#include 

#include 

#include 

#include "pdflib.h"



int main(void)

{

    PDF             *p = NULL;

    int                 Font_E = 0, Font_H = 0, Font_CS = 0, Left = 50, y = 800, i = 0;

    const int       INCRY = 25;

    char              text[128], buf[128];



    /* 1 byte text (English: "Simplified Chinese") */

    static const char byte_text[] =

        "\123\151\155\160\154\151\146\151\145\144\040\103\150\151\156\145\163\145";

    static const int byte_len = 18;

    static const char byte2_text[] = {0x53,0x69,0x6D,0x70,0x6C,0x69,0x66,0x69,0x65,

                                                0x64,0x20,0x43,0x68,0x69,0x6E,0x65,0x73,0x65};

    static const int byte2_len = 18;

    /* 2 byte text (Simplified Chinese) */

    static const unsigned short utf16_text[] = {0x7B80,0x4F53,0x4E2D,0x6587};

    static const int utf16_len = 8;

    static const unsigned char utf16be_text[] ="\173\200\117\123\116\055\145\207";

    static const int utf16be_len = 8;

    static const unsigned char utf16be_bom_text[] = "\376\377\173\200\117\123\116\055\145\207";

    static const int utf16be_bom_len = 10;

    static const unsigned char utf16le_text[] ="\200\173\123\117\055\116\207\145";

    static const int utf16le_len = 8;

    static const unsigned char utf16le_bom_text[] = "\377\376\200\173\123\117\055\116\207\145";

    static const int utf16le_bom_len = 10;

    static const unsigned char utf8_text[] = "\347\256\200\344\275\223\344\270\255\346\226\207";

    static const int utf8_len = 12;

    static const unsigned char utf8_bom_text[] = "\xEF\xBB\xBF\xE7\xAE\x80\xE4\xBD\x93\xE4\xB8\xAD\xE6\x96\x87";

    static const int utf8_bom_len = 15;

    static const char htmlutf16_text[] = " ";

    static const int htmlutf16_len = sizeof(htmlutf16_text) - 1;



    typedef struct

    {

        char *textformat;

        char *encname;

        const char *textstring;

        const int  *textlength;

        const char *bomkind;

    } TestCase;



static const TestCase table_8[] = {

 { "bytes",      "winansi",  (const char *)byte_text,         &byte_len,      ""},

     { "auto",        "winansi",  (const char *)byte_text,         &byte_len,      ""},

     { "bytes",      "winansi",  (const char *)byte2_text,       &byte2_len,     ""}, };



    static const TestCase table_16[] =  {

{ "auto",  "unicode",  (const char *)utf16_text,       &utf16_len,      ""},

{ "utf16", "unicode",  (const char *)utf16_text,       &utf16_len,      ""},

{ "auto",  "unicode",  (const char *)utf16be_bom_text, &utf16be_bom_len, ", UTF-16+BE-BOM"},

{ "auto",     "unicode",     (const char *)utf16le_bom_text, &utf16le_bom_len, ", UTF-16+LE-BOM"},

{ "utf16be", "unicode",    (const char *)utf16be_text,         &utf16be_len,    ""},

{ "utf16le",   "unicode",   (const char *)utf16le_text,           &utf16le_len,    ""},

{ "utf8",       "unicode",    (const char *)utf8_text,               &utf8_len,       ""},

{ "auto",       "unicode",   (const char *)utf8_bom_text,      &utf8_bom_len, ", UTF-8+BOM"},

{ "bytes", "unicode",   (const char *)htmlutf16_text, &htmlutf16_len, ", HTML unicode character"}, };



    const int   tsize_8 = sizeof table_8 / sizeof (TestCase);

    const int   tsize_16 = sizeof table_16 / sizeof (TestCase);



    /* create a new PDFlib object */

    if ((p = PDF_new()) == (PDF *) 0)

    {

        printf("Couldnt create PDFlib object (out of memory)!
");

        return(2);

    }



    PDF_TRY(p) {

	if (PDF_begin_document(p, "pdflib_cs4.pdf", 0, "") == -1) 

            {

	    printf("Error: %s
", PDF_get_errmsg(p));

	    return(2);

	}



	PDF_set_info(p, "Creator", "pdflib_cs4.c");

	PDF_set_info(p, "Author", "[email protected]");

	PDF_set_info(p, "Title", "Output Chinese Simplify with Different textformat");



        /* Start a new page. */

        PDF_begin_page_ext(p, a4_width, a4_height, "");

        Font_H = PDF_load_font(p, "Helvetica-Bold", 0, "winansi", "");



        /* 8-bit encoding */

        Font_E = PDF_load_font(p, "Times", 0, "winansi", "");

        PDF_setfont(p, Font_H, 24);

        PDF_show_xy(p, "8-bit encoding", Left+40,  y);

        y -= 2*INCRY;



        for (i = 0; i < tsize_8; ++i)

        {

            PDF_setfont(p, Font_H, 14);

            sprintf(text, "%s encoding, %s textformat %s: ", table_8[i].encname, 

                table_8[i].textformat, table_8[i].bomkind);

            PDF_show_xy(p, text, Left,  y);

            y -= INCRY;

            PDF_set_parameter(p, "textformat", table_8[i].textformat);

            PDF_setfont(p, Font_E, 14);

            PDF_show_xy(p, table_8[i].textstring, Left,  y);

            y -= INCRY;

        } /* for */



        /* 16-bit encoding */

        PDF_setfont(p, Font_H, 24);

        y -= 2*INCRY;

        PDF_show_xy(p, "16-bit encoding", Left+40,  y);

        y -= 2*INCRY;

        PDF_set_parameter(p, "charref", "true");

        Font_CS = PDF_load_font(p, "STSong-Light", 0, "UniGB-UCS2-H", "");

        for (i = 0; i < tsize_16; i++)

        {

            PDF_setfont(p, Font_H, 14);

[TIL] JS: ES6

2021/1/11第11期ワークフロー-🌊