GolangJson Marshal UN-Escape Unicode Characters/goのMarshalのエスケープとフォーマット出力を解決する

73866 ワード

GoLang Escape characters for Json fomat
  • 前言
  • go-json反転義(1)
  • go-json反転義(2)
  • 参照接続
  • 前言GoLang構造体structMarshal行った後、HTMLのうち等をUnicode形式で出力し、以下の形式である:第1行コードはMarshal後の出力であるが、元の形式のjsonが必要な場合があるが、GolangにはPythonDecodeの機能がなく、使用者自身が変換する必要がある.
    "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e"
    
    <span><font color=\"black\">hand-rolled </font></span>
    

    本文は主に2つの問題を解決する:1、go jsonの反転義Unicode、元のフォーマットで出力する;2、go jsonのFormation形式出力.
    go-json反転義(1)
    この方法はmarshal後のjsonテキストを対応するエスケープを元のフォーマットに置き換えることにより,strconv.Quote,strconv.Unquote,strings.Replaceの3つの方法を採用し,具体的にはソースコードを参照する
    ソースコード
    // Quote returns a double-quoted Go string literal representing s. The
    // returned string uses Go escape sequences (\t, 
    , \xFF, \u0100) for
    // control characters and non-printable characters as defined by // IsPrint. func Quote(s string) string { return quoteWith(s, '"', false, false) } func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) } func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { // Often called with big strings, so preallocate. If there's quoting, // this is conservative but still helps a lot. if cap(buf)-len(buf) < len(s) { nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) copy(nBuf, buf) buf = nBuf } buf = append(buf, quote) for width := 0; len(s) > 0; s = s[width:] { r := rune(s[0]) width = 1 if r >= utf8.RuneSelf { r, width = utf8.DecodeRuneInString(s) } if width == 1 && r == utf8.RuneError { buf = append(buf, `\x`...) buf = append(buf, lowerhex[s[0]>>4]) buf = append(buf, lowerhex[s[0]&0xF]) continue } buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) } buf = append(buf, quote) return buf } // Unquote interprets s as a single-quoted, double-quoted, // or backquoted Go string literal, returning the string value // that s quotes. (If s is single-quoted, it would be a Go // character literal; Unquote returns the corresponding // one-character string.) func Unquote(s string) (string, error) { n := len(s) if n < 2 { return "", ErrSyntax } quote := s[0] if quote != s[n-1] { return "", ErrSyntax } s = s[1 : n-1] if quote == '`' { if contains(s, '`') { return "", ErrSyntax } if contains(s, '\r') { // -1 because we know there is at least one \r to remove. buf := make([]byte, 0, len(s)-1) for i := 0; i < len(s); i++ { if s[i] != '\r' { buf = append(buf, s[i]) } } return string(buf), nil } return s, nil } if quote != '"' && quote != '\'' { return "", ErrSyntax } if contains(s, '
    '
    ) { return "", ErrSyntax } // Is it trivial? Avoid allocation. if !contains(s, '\\') && !contains(s, quote) { switch quote { case '"': if utf8.ValidString(s) { return s, nil } case '\'': r, size := utf8.DecodeRuneInString(s) if size == len(s) && (r != utf8.RuneError || size != 1) { return s, nil } } } var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := UnquoteChar(s, quote) if err != nil { return "", err } s = ss if c < utf8.RuneSelf || !multibyte { buf = append(buf, byte(c)) } else { n := utf8.EncodeRune(runeTmp[:], c) buf = append(buf, runeTmp[:n]...) } if quote == '\'' && len(s) != 0 { // single-quoted must be single character return "", ErrSyntax } } return string(buf), nil }

    アンチエイリアスコード
    func UnescapeUnicodeCharactersInJSON(jsonRaw []byte) ([]byte, error) {
    	str, err := strconv.Unquote(strings.Replace(strconv.Quote(string(jsonRaw)), `\\u`, `\u`, -1))
    	if err != nil {
    		return nil, err
    	}
    	return []byte(str), nil
    }
    

    テスト
    既存の処理-エスケープ出力
    // @File:    unescapeChar
    // @Version: 1.0.0
    // @Creator: JoeLang
    // @Date:    2020/5/16 12:07
    
    package main
    
    import (
    	"encoding/json"
    	"fmt"
    )
    
    type Track struct {
    	XmlRequest string `json:"xmlRequest"`
    }
    
    func main() {
    	message := new(Track)
    	message.XmlRequest = "XML"
    	fmt.Println("Before Marshal", message)
    	messageJSON, _ := json.Marshal(message)
    	fmt.Println("After marshal", string(messageJSON))
    }
    //---------output---------------
    Before Marshal &{<car><mirror>XML</mirror></car>}
    After marshal {"xmlRequest":"\u003ccar\u003e\u003cmirror\u003eXML\u003c/mirror\u003e\u003c/car\u003e"}
    

    非エスケープコード
    func main() {
    	message := new(Track)
    	message.XmlRequest = "XML"
    	fmt.Println("Before Marshal", message)
    	messageJSON, _ := json.Marshal(message)
    	//-------------
    	unescapeJson, _ := UnescapeUnicodeCharactersInJSON(messageJSON)
    	//-------------
    	fmt.Println("After marshal", string(messageJSON))
    	fmt.Println("After marshal", string(unescapeJson))
    }
    
    //--------output---------
    Before Marshal &{<car><mirror>XML</mirror></car>}
    After marshal {"xmlRequest":"\u003ccar\u003e\u003cmirror\u003eXML\u003c/mirror\u003e\u003c/car\u003e"}
    After marshal {"xmlRequest":"XML"}
    

    go-json反転義(2)
    go json/encoding/streamソースコード
    // An Encoder writes JSON values to an output stream.
    type Encoder struct {
    	w          io.Writer
    	err        error
    	escapeHTML bool
    
    	indentBuf    *bytes.Buffer
    	indentPrefix string
    	indentValue  string
    }
    
    // NewEncoder returns a new encoder that writes to w.
    func NewEncoder(w io.Writer) *Encoder {
    	return &Encoder{w: w, escapeHTML: true}
    }
    
    // Encode writes the JSON encoding of v to the stream,
    // followed by a newline character.
    //
    // See the documentation for Marshal for details about the
    // conversion of Go values to JSON.
    func (enc *Encoder) Encode(v interface{}) error {
    	if enc.err != nil {
    		return enc.err
    	}
    	e := newEncodeState()
    	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}
    	if err != nil {
    		return err
    	}
    
    	// Terminate each value with a newline.
    	// This makes the output look a little nicer
    	// when debugging, and some kind of space
    	// is required if the encoded value was a number,
    	// so that the reader knows there aren't more
    	// digits coming.
    	e.WriteByte('
    '
    ) b := e.Bytes() if enc.indentPrefix != "" || enc.indentValue != "" { if enc.indentBuf == nil { enc.indentBuf = new(bytes.Buffer) } enc.indentBuf.Reset() err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) if err != nil { return err } b = enc.indentBuf.Bytes() } if _, err = enc.w.Write(b); err != nil { enc.err = err } encodeStatePool.Put(e) return err } // SetIndent instructs the encoder to format each subsequent encoded // value as if indented by the package-level function Indent(dst, src, prefix, indent). // Calling SetIndent("", "") disables indentation. func (enc *Encoder) SetIndent(prefix, indent string) { enc.indentPrefix = prefix enc.indentValue = indent } // SetEscapeHTML specifies whether problematic HTML characters // should be escaped inside JSON quoted strings. // The default behavior is to escape &, to \u0026, \u003c, and \u003e // to avoid certain safety problems that can arise when embedding JSON in HTML. // // In non-HTML settings where the escaping interferes with the readability // of the output, SetEscapeHTML(false) disables this behavior. func (enc *Encoder) SetEscapeHTML(on bool) { enc.escapeHTML = on }
    json/encoding/stream
    この関数は一般的です
    func UnescapeJsonMarshal(jsonRaw interface{}) ([]byte, error) {
    	buffer := &bytes.Buffer{}
    	encoder := json.NewEncoder(buffer)
    	encoder.SetEscapeHTML(false)
    	//        
    	encoder.SetIndent("", "  ")
    	err := encoder.Encode(jsonRaw)
    	return buffer.Bytes(), err
    }
    

    テスト
    // @File:    unescapeChar
    // @Version: 1.0.0
    // @Creator: JoeLang
    // @Date:    2020/5/16 12:07
    
    package main
    
    import (
    	"bytes"
    	"encoding/json"
    	"fmt"
    	"strconv"
    	"strings"
    )
    
    type Track struct {
    	XmlRequest string `json:"xmlRequest"`
    }
    
    type Word struct {
    	Index           int      `json:"index"`
    	Category        string   `json:"category"`
    	ScreenText      string   `json:"screen_text"`
    	Answers         []Result `json:"answers"`
    }
    
    type Result struct {
    	Content string `json:"content"`
    	Correct bool   `json:"correct"`
    }
    
    type WordSlice struct {
    	Items []Word `json:"items"`
    }
    
    
    func main() {
    	answer := []Result{
    		{
    			Content: " のひら",
    			Correct: true,
    		},
    		{
    			Content: " ~,~ごとに",
    			Correct: false,
    		},
    		{
    			Content: "   ",
    			Correct: false,
    		},
    		{
    			Content: "ゲル《モンゴル  で われる  のテント   》",
    			Correct: false,
    		},
    	}
    
    	item := []Word{
    		{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
    		{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
    		{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
    	}
    	word := WordSlice{
    		Items: item,
    	}
    	jsonRaw, _ := UnescapeJsonMarshal(word)
    	fmt.Println("After Escaping", string(jsonRaw))
    
    	jsonRaw1, _ := json.MarshalIndent(word, "", "  ")
    	//MarshalIndent    Marshal
    	fmt.Println("NO Escaping", string(jsonRaw1))
    }
    
    func UnescapeJsonMarshal(jsonRaw interface{}) ([]byte, error) {
    	buffer := &bytes.Buffer{}
    	encoder := json.NewEncoder(buffer)
    	encoder.SetEscapeHTML(false)
    	//        
    	encoder.SetIndent("", "  ")
    	err := encoder.Encode(jsonRaw)
    	return buffer.Bytes(), err
    }
    

    出力結果
    After Escaping 
    {
      "items": [
        {
          "index": 1,
          "category": "read_word",
          "screen_text": "hand-rolled ",
          "answers": [
            {
              "content": " のひら",
              "correct": true
            },
            {
              "content": " ~,~ごとに",
              "correct": false
            },
            {
              "content": "   ",
              "correct": false
            },
            {
              "content": "ゲル《モンゴル  で われる  のテント   》",
              "correct": false
            }
          ]
        },
        {
          "index": 1,
          "category": "read_word",
          "screen_text": "hand-rolled ",
          "answers": [
            {
              "content": " のひら",
              "correct": true
            },
            {
              "content": " ~,~ごとに",
              "correct": false
            },
            {
              "content": "   ",
              "correct": false
            },
            {
              "content": "ゲル《モンゴル  で われる  のテント   》",
              "correct": false
            }
          ]
        },
        {
          "index": 1,
          "category": "read_word",
          "screen_text": "hand-rolled ",
          "answers": [
            {
              "content": " のひら",
              "correct": true
            },
            {
              "content": " ~,~ごとに",
              "correct": false
            },
            {
              "content": "   ",
              "correct": false
            },
            {
              "content": "ゲル《モンゴル  で われる  のテント   》",
              "correct": false
            }
          ]
        }
      ]
    }
    
    NO Escaping 
    {
      "items": [
        {
          "index": 1,
          "category": "read_word",
          "screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
          "answers": [
            {
              "content": " のひら",
              "correct": true
            },
            {
              "content": " ~,~ごとに",
              "correct": false
            },
            {
              "content": "   ",
              "correct": false
            },
            {
              "content": "ゲル《モンゴル  で われる  のテント   》",
              "correct": false
            }
          ]
        },
        {
          "index": 1,
          "category": "read_word",
          "screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
          "answers": [
            {
              "content": " のひら",
              "correct": true
            },
            {
              "content": " ~,~ごとに",
              "correct": false
            },
            {
              "content": "   ",
              "correct": false
            },
            {
              "content": "ゲル《モンゴル  で われる  のテント   》",
              "correct": false
            }
          ]
        },
        {
          "index": 1,
          "category": "read_word",
          "screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
          "answers": [
            {
              "content": " のひら",
              "correct": true
            },
            {
              "content": " ~,~ごとに",
              "correct": false
            },
            {
              "content": "   ",
              "correct": false
            },
            {
              "content": "ゲル《モンゴル  で われる  のテント   》",
              "correct": false
            }
          ]
        }
      ]
    }
    

    リファレンス接続
  • https://play.golang.org/p/bdqv3TUGr3
  • https://play.golang.org/p/pUsrzrrcDG-