霊格斯LDX,LD 2等フォーマット分析
24473 ワード
辞書の形式はこうです
struct{int Flag;//?LD 2,?LDXなどのタグbyte[16]MD 5;//推定MD 5値であり、LD 2がLDXに変換されると前の2つのフィールドのみが変化byte[4]byteorder;short majorversion;short minorversion;byte[16]ID;int version;int flags;byte[16]MD 52;int unknown 1;int unknown 2;int unknown 3;int unknown 4;}Header;//以上が辞書ヘッダ
int unknown;//合計1 int xmllength;//辞書情報のxml長[Encrypted("DES")]byte[xmllength]infoxml;//このxmlは、DESによって暗号化された辞書情報を保存します.
struct{int dicttype;//取値0,1,2,3,5[Switch(dicttype)]union{[Case(3)]//dicttype=3はローカル辞書{int dictlength;[Length(dictlength)]{int indexLength;//インデックステーブル長int uncompressedwordoffset;//単語テーブル解凍後ファイルの開始位置int uncompressedxmloffset;//xml解凍後ファイルの開始位置int uncompressedxmldatalength;//解凍後xmlデータ長int compresseddatalength;[Length(indexLength)]int[]indexes;//インデックステーブル[Length(compresseddatalength)]struct{int compressedblockindexlength;//解凍後各ブロックデータサイズint uncompressedTotalLength;//解凍後総サイズint[]compressedindexes;//各ブロックデータのcompresseddataでの開始位置、次は終了位置byte[]compresseddata;}compresseddata; } } [Case(0)]//dicttype=0は辞書の終わりを表す}Dictcontent;Dict[];
解凍後のデータ構造:byte[]wordindexdata;byte[] worddata;byte[] xmldata;
各単語のインデックス値に10を乗じたものがwordindexdataのオフセット量であり、ここで以下のデータ構造structを読み出す{int WordOffset;//worddataでの単語のオフセットint XmlOffset;//xmldataでの単語のオフセットbyte Flag 1;byte Flag 2;//1であればworddataでのデータがインデックス値であることを示し、整数を読み出し、このインデックスでwordとxml int NextWordOffsetを再計算します//wordの長さを計算し、int NextXmlOffsetを読み込みます//xmlの長さを計算し、}WordInfoを読み込みます;
読み出したwordは単語xmlが単語解釈であり、変形したhtmlフォーマットであり、xsltで標準htmlに変換して読み取る必要がある.
復号後の辞書情報の内容はこの形式であり、アイコンはbase 64で符号化されるべきである.
解凍後の辞書エントリのxmlは、次のとおりです.
転載先:https://www.cnblogs.com/SuperBrothers/archive/2012/11/24/2785971.html
struct{int Flag;//?LD 2,?LDXなどのタグbyte[16]MD 5;//推定MD 5値であり、LD 2がLDXに変換されると前の2つのフィールドのみが変化byte[4]byteorder;short majorversion;short minorversion;byte[16]ID;int version;int flags;byte[16]MD 52;int unknown 1;int unknown 2;int unknown 3;int unknown 4;}Header;//以上が辞書ヘッダ
int unknown;//合計1 int xmllength;//辞書情報のxml長[Encrypted("DES")]byte[xmllength]infoxml;//このxmlは、DESによって暗号化された辞書情報を保存します.
struct{int dicttype;//取値0,1,2,3,5[Switch(dicttype)]union{[Case(3)]//dicttype=3はローカル辞書{int dictlength;[Length(dictlength)]{int indexLength;//インデックステーブル長int uncompressedwordoffset;//単語テーブル解凍後ファイルの開始位置int uncompressedxmloffset;//xml解凍後ファイルの開始位置int uncompressedxmldatalength;//解凍後xmlデータ長int compresseddatalength;[Length(indexLength)]int[]indexes;//インデックステーブル[Length(compresseddatalength)]struct{int compressedblockindexlength;//解凍後各ブロックデータサイズint uncompressedTotalLength;//解凍後総サイズint[]compressedindexes;//各ブロックデータのcompresseddataでの開始位置、次は終了位置byte[]compresseddata;}compresseddata; } } [Case(0)]//dicttype=0は辞書の終わりを表す}Dictcontent;Dict[];
解凍後のデータ構造:byte[]wordindexdata;byte[] worddata;byte[] xmldata;
各単語のインデックス値に10を乗じたものがwordindexdataのオフセット量であり、ここで以下のデータ構造structを読み出す{int WordOffset;//worddataでの単語のオフセットint XmlOffset;//xmldataでの単語のオフセットbyte Flag 1;byte Flag 2;//1であればworddataでのデータがインデックス値であることを示し、整数を読み出し、このインデックスでwordとxml int NextWordOffsetを再計算します//wordの長さを計算し、int NextXmlOffsetを読み込みます//xmlの長さを計算し、}WordInfoを読み込みます;
読み出したwordは単語xmlが単語解釈であり、変形したhtmlフォーマットであり、xsltで標準htmlに変換して読み取る必要がある.
復号後の辞書情報の内容はこの形式であり、アイコンはbase 64で符号化されるべきである.
<dict id="3699E846E5BC094CA733B92FD733ACDF" version="2" byte_order="4321" type="1" name="Collins COBUILD
Advanced Learner's English Dictionary"
icon="89504E470D0A1A0A0000000D4948445200000010000000100803000000282D0F530000000467414D410000AFC837058AE90000001
974455874536F6674776172650041646F626520496D616765526561647971C9653C00000114504C5445F7F7FAFCFCFD7F7FB1F9F9FBFAFA
FCF4F4F86F6FA6F6F6F9C2C2D9000064A5A5C85E5E9DC3C3DA494991C5C5DBA1A1C50404671C1C765A5A9A111170AAAACBCACADECFCFE1F
3F3F87070A8F5F5F9EFEFF57373A91818744B4B91EDEDF48080B2F8F8FA050569DFDFEB58589A9191BC6C6CA6BFBFD8F2F2F7C0C0D7BFBF
D79999BFEAEAF2D3D3E4D1D1E20101663F3F8B212178141471D1D1E3A0A0C51616737979AE020267D2D2E37070A77E7EB147478E3E3E8A9
494BD4E4E92B6B6D29E9EC40A0A6BB0B0CFD8D8E7FDFDFDDBDBE9F6F6FA4D4D94C9C9DDE1E1EC61619FBEBED7E3E3EE9797BFC1C1D98484
B421217901016746468E6E6EA66C6CA5F0F0F5C9C9DE232379F0F0F6ECECF3000065000066FFFFFF5AB8BD910000005C74524E53FFFFFFF
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00836D17E0000000E94944415478DA628806817037431E3
B4F3910132080188058495B2392934D224A8C49373A1A208080025C020E3ECA325E0C2EAEDC7C8CD10001C410CD681D1969C5C200D61AC2
C40C104040065364A4693414B0B303041043B4B9962A3F2B4C209A03208018B8022205DDE1FC6867800062088B8A8AB4450844030410830
15040164900208018ECA3224D789004000288215A54C54284016128400001997E919196303E332B4000011DA61F1919C8C202E6ABCB2B00
04105085B19EA20DB7BF2FB38778B0916434400081743B796B023D276D161AA4131D0D104010E3A47885D51C8522404C800003000286447
A9A58897A0000000049454E44AE426082" date="2007-11-21" update="2007-11-21"><from lang="en" charset="utf-8"
sort="0" /><to lang="en" charset="utf-8" /><flag>000000B1flag><info><item lang="en"><title>Collins COBUILD
Advanced Learner's English Dictionarytitle><edition>4edition><description
/><publisher><author>HarperCollins Publishers Ltdauthor><email
/><website>http://www.collins.co.uk/website><copyright>Copyright © HarperCollins Publishers Ltd
2004.copyright>publisher><message /><license />item><item lang="zh-CN"><title>
title><edition>4edition><description /><publisher><author>HarperCollins Publishers Ltdauthor><email
/><website>http://www.collins.co.uk/website><copyright>Copyright © HarperCollins Publishers Ltd
2004.copyright>publisher><message /><license />item><item lang="zh-TW"><title>
title><edition>4edition><description /><publisher><author>HarperCollins Publishers Ltdauthor><email
/><website>http://www.collins.co.uk/website><copyright>Copyright © HarperCollins Publishers Ltd
2004.copyright>publisher><message /><license />item>info><str /><res /><gls count="32730"
index_count="32730" maxsize_key="45" maxsize_data="26986" />dict>
解凍後の辞書エントリのxmlは、次のとおりです.
<C><F><H><L><l>1l>wordL> <h><U>n.U>h>H><K>Pronunciation: 'wərd Function: noun
Etymology: Middle
English, from Old English; akin to Old High German wort word, Latin verbum, Greek
eirein to say, speak, Hittite weriya- to call, name Date: b
efore 12th century
1 a : something that is said b
plural (1) : TALK, DISCOURSE <putting one's feelings into
words> (2) : the text of a vocal musical composition c : a brief
remark or conversation <would like to have a word with you>
2 a (1) : a speech
sound or series of speech sounds that symbolizes and communicates a meaning usually without being divisible
into smaller units capable of independent use (2) : the entire set of linguistic forms produced
by combining a single base with various inflectional elements without change in the part of speech elements
b (1) : a written or printed character or combination of characters representing a spoken word
<the number of words to a line> ― sometimes used with the first letter of a real or
pretended taboo word prefixed as an often humorous euphemism <the first man to utter the f word on
British TV ― Time> <we were not afraid to use the d word and talk about death ― Erma
Bombeck> (2) : any segment of written or printed discourse ordinarily appearing between spaces
or between a space and a punctuation mark c : a number of bytes processed as a unit and conveying
a quantum of information in communication and computer work
3 : ORDER, COMMAND <don't move
till I give the word>
4 often capitalized a : LOGOS b : GOSPEL 1A
c : the expressed or manifested mind and will of God
5 a : NEWS, INFORMATION <sent word that he would be late> b :
RUMOR
6 : the act of speaking or of
making verbal communication
7 : SAYING, PROVERB
8 : PROMISE, DECLARATION
<kept her word>
9 : a quarrelsome utterance or conversation ― usually used in
plural <they had words and parted>
10 : a verbal signal : PASSWORD
11 slang ― used interjectionally to
express agreement
–good word
1 : a favorable statement <put in a good
word for me>
2 : good news <what's the good word>
–in a
word : in short
–in so many words
1 : in exactly those terms
<implied that such actions were criminal but did not say so in so many words>
2 :
in plain forthright language <in so many words, she wasn't fit to be seen ― Jean Stafford>
–of few words : not inclined to say more than is necessary : LACONIC <a man of few words>
–of
one's word : that can be relied on to keep a promise ― used only after man or woman
<a man of his word>
–upon my word : with my assurance : INDEED, ASSUREDLY <upon my word, I've never heard of such a thing>]]
>2 word n. Function: intransitive
verb Date: 1
3th century
archaic : SPEAK
transitive verb : to express in words : PHRASE <a carefully worded reply> ]]>K>F><F><H><L><x
K="#333399">word <h>(as used in expressions)h>x>L> <h><U>n.U>h>H><K>code word
content
word
dirty word
entry word
fighting
word
form word
four letter word
function word
ghost word
guide word
key
word
last word
my word
weasel
word
upon my word
of one's word
in a
word
good word
word association test
word class
word for word
word hoard
word mongering
word of
mouth
word order
word processing
word
process
word processor
word square
word stress
word accent
word wrap
eat one's words
of few words
in so many words
]]>K>F>C>
転載先:https://www.cnblogs.com/SuperBrothers/archive/2012/11/24/2785971.html