The Will Will Web

記載著 Will 在網路世界的學習心得與技術分享

Quoted Printable 編碼與解碼

之前曾經有一次到要將郵件主旨(Subject)解碼的經驗,但主旨卻用 Quoted Printable 編碼過了,所以上網找了個 Quoted Printable Encoding & Decoding 的範例程式來看,但下載回來卻不能正常解碼(Decode),經我一翻修改後才可以正常解碼。

using System;
using System.Collections;
using System.Text;

/// <summary>
/// Class for encoding and decoding a string to QuotedPrintable
/// RFC 1521 http://www.ietf.org/rfc/rfc1521.txt
/// RFC 2045 http://www.ietf.org/rfc/rfc2045.txt
/// Date: 2006-03-23
/// Author: Kevin Spaun
/// Company: SPAUN Informationstechnik GmbH - http://www.spaun-it.com/
/// Feedback: kspaun@spaun-it.de
/// License: This piece of code comes with no guaranties. You can use it for whatever you want for free.
///
/// Modified by Will Huang ( http://blog.miniasp.com )
/// Modified at 2008-02-13
///
/// </summary>
public class QuotedPrintable
{
    private const byte EQUALS = 61;
    private const byte CR = 13;
    private const byte LF = 10;
    private const byte SPACE = 32;
    private const byte TAB = 9;

    /// <summary>
    /// Encodes a string to QuotedPrintable
    /// </summary>
    /// <param name="_ToEncode">String to encode</param>
    /// <returns>QuotedPrintable encoded string</returns>
    public static string Encode(string _ToEncode)
    {
        StringBuilder Encoded = new StringBuilder();
        string hex = string.Empty;
        byte[] bytes = Encoding.Default.GetBytes(_ToEncode);
        int count = 0;

        for (int i = 0; i < bytes.Length; i++)
        {
            //these characters must be encoded
            if ((bytes[i] < 33 || bytes[i] > 126 || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE)
            {
                if (bytes[i].ToString("X").Length < 2)
                {
                    hex = "0" + bytes[i].ToString("X");
                    Encoded.Append("=" + hex);
                }
                else
                {
                    hex = bytes[i].ToString("X");
                    Encoded.Append("=" + hex);
                }
            }
            else
            {
                //check if index out of range
                if ((i + 1) < bytes.Length)
                {
                    //if TAB is at the end of the line - encode it!
                    if ((bytes[i] == TAB && bytes[i + 1] == LF) || (bytes[i] == TAB && bytes[i + 1] == CR))
                    {
                        Encoded.Append("=0" + bytes[i].ToString("X"));
                    }
                    //if SPACE is at the end of the line - encode it!
                    else if ((bytes[i] == SPACE && bytes[i + 1] == LF) || (bytes[i] == SPACE && bytes[i + 1] == CR))
                    {
                        Encoded.Append("=" + bytes[i].ToString("X"));
                    }
                    else
                    {
                        Encoded.Append(System.Convert.ToChar(bytes[i]));
                    }
                }
                else
                {
                    Encoded.Append(System.Convert.ToChar(bytes[i]));
                }
            }
            if (count == 75)
            {
                Encoded.Append("=\r\n"); //insert soft-linebreak
                count = 0;
            }
            count++;
        }

        return Encoded.ToString();
    }

    /// <summary>
    /// Decodes a QuotedPrintable encoded string
    /// </summary>
    /// <param name="_ToDecode">The encoded string to decode</param>
    /// <returns>Decoded string</returns>
    public static string Decode(string _ToDecode)
    {
        //remove soft-linebreaks first
        _ToDecode = _ToDecode.Replace("=\r\n", "");

        char[] chars = _ToDecode.ToCharArray();

        byte[] bytes = new byte[chars.Length];
       
        int bytesCount = 0;

        for (int i = 0; i < chars.Length; i++)
        {
            // if encoded character found decode it
            if (chars[i] == '=')
            {
                bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber));
               
                i += 2;
            }
            else
            {
                bytes[bytesCount++] = System.Convert.ToByte(chars[i]);
            }
        }

        return System.Text.Encoding.Default.GetString(bytes, 0, bytesCount);
    }
}

其實用 Quoted Printable 編碼、解碼很不安心、且速度也很慢(如果你拿來跟Base64比較的話),因為除非你知道你編碼(Encode)的時候用的是什麼字集編碼(Encoding),否則很容易在解碼(Decode)的時候出現亂碼!例如說我用的是「繁體中文」的作業系統,其 System.Text.Encoding.Default 就是 BIG5,但如果你做 Encode 的時候的編碼跟 Decode 時候的編碼不一致時,那就會解碼失敗了。

所以以上的程式如果要改用 UTF-8 來編碼、解碼的話,只要把程式中的 Encoding.Default 改成 Encoding.UTF8 就可以了。

留言評論