folly/wangle/codec/LengthFieldBasedFrameDecoder.h

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #pragma once
  17
  18 #include <folly/wangle/codec/ByteToMessageCodec.h>
  19 #include <folly/io/Cursor.h>
  20
  21 namespace folly { namespace wangle {
  22
  23 /**
  24  * A decoder that splits the received IOBufs dynamically by the
  25  * value of the length field in the message.  It is particularly useful when you
  26  * decode a binary message which has an integer header field that represents the
  27  * length of the message body or the whole message.
  28  *
  29  * LengthFieldBasedFrameDecoder has many configuration parameters so
  30  * that it can decode any message with a length field, which is often seen in
  31  * proprietary client-server protocols. Here are some example that will give
  32  * you the basic idea on which option does what.
  33  *
  34  * 2 bytes length field at offset 0, do not strip header
  35  *
  36  * The value of the length field in this example is 12 (0x0C) which
  37  * represents the length of "HELLO, WORLD".  By default, the decoder assumes
  38  * that the length field represents the number of the bytes that follows the
  39  * length field.  Therefore, it can be decoded with the simplistic parameter
  40  * combination.
  41  *
  42  * lengthFieldOffset   = 0
  43  * lengthFieldLength   = 2
  44  * lengthAdjustment    = 0
  45  * initialBytesToStrip = 0 (= do not strip header)
  46  *
  47  * BEFORE DECODE (14 bytes)         AFTER DECODE (14 bytes)
  48  * +--------+----------------+      +--------+----------------+
  49  * | Length | Actual Content |----->| Length | Actual Content |
  50  * | 0x000C | "HELLO, WORLD" |      | 0x000C | "HELLO, WORLD" |
  51  * +--------+----------------+      +--------+----------------+
  52  *
  53  *
  54  * 2 bytes length field at offset 0, strip header
  55  *
  56  * Because we can get the length of the content by calling
  57  * ioBuf->computeChainDataLength(), you might want to strip the length
  58  * field by specifying initialBytesToStrip.  In this example, we
  59  * specified 2, that is same with the length of the length field, to
  60  * strip the first two bytes.
  61  *
  62  * lengthFieldOffset   = 0
  63  * lengthFieldLength   = 2
  64  * lengthAdjustment    = 0
  65  * initialBytesToStrip = 2 (= the length of the Length field)
  66  *
  67  * BEFORE DECODE (14 bytes)         AFTER DECODE (12 bytes)
  68  * +--------+----------------+      +----------------+
  69  * | Length | Actual Content |----->| Actual Content |
  70  * | 0x000C | "HELLO, WORLD" |      | "HELLO, WORLD" |
  71  * +--------+----------------+      +----------------+
  72  *
  73  *
  74  * 2 bytes length field at offset 0, do not strip header, the length field
  75  * represents the length of the whole message
  76  *
  77  * In most cases, the length field represents the length of the message body
  78  * only, as shown in the previous examples.  However, in some protocols, the
  79  * length field represents the length of the whole message, including the
  80  * message header.  In such a case, we specify a non-zero
  81  * lengthAdjustment.  Because the length value in this example message
  82  * is always greater than the body length by 2, we specify -2
  83  * as lengthAdjustment for compensation.
  84  *
  85  * lengthFieldOffset   =  0
  86  * lengthFieldLength   =  2
  87  * lengthAdjustment    = -2 (= the length of the Length field)
  88  * initialBytesToStrip =  0
  89  *
  90  * BEFORE DECODE (14 bytes)         AFTER DECODE (14 bytes)
  91  * +--------+----------------+      +--------+----------------+
  92  * | Length | Actual Content |----->| Length | Actual Content |
  93  * | 0x000E | "HELLO, WORLD" |      | 0x000E | "HELLO, WORLD" |
  94  * +--------+----------------+      +--------+----------------+
  95  *
  96  *
  97  * 3 bytes length field at the end of 5 bytes header, do not strip header
  98  *
  99  * The following message is a simple variation of the first example.  An extra
 100  * header value is prepended to the message.  lengthAdjustment is zero
 101  * again because the decoder always takes the length of the prepended data into
 102  * account during frame length calculation.
 103  *
 104  * lengthFieldOffset   = 2 (= the length of Header 1)
 105  * lengthFieldLength   = 3
 106  * lengthAdjustment    = 0
 107  * initialBytesToStrip = 0
 108  *
 109  * BEFORE DECODE (17 bytes)                      AFTER DECODE (17 bytes)
 110  * +----------+----------+----------------+      +----------+----------+----------------+
 111  * | Header 1 |  Length  | Actual Content |----->| Header 1 |  Length  | Actual Content |
 112  * |  0xCAFE  | 0x00000C | "HELLO, WORLD" |      |  0xCAFE  | 0x00000C | "HELLO, WORLD" |
 113  * +----------+----------+----------------+      +----------+----------+----------------+
 114  *
 115  *
 116  * 3 bytes length field at the beginning of 5 bytes header, do not strip header
 117  *
 118  * This is an advanced example that shows the case where there is an extra
 119  * header between the length field and the message body.  You have to specify a
 120  * positive lengthAdjustment so that the decoder counts the extra
 121  * header into the frame length calculation.
 122  *
 123  * lengthFieldOffset   = 0
 124  * lengthFieldLength   = 3
 125  * lengthAdjustment    = 2 (= the length of Header 1)
 126  * initialBytesToStrip = 0
 127  *
 128  * BEFORE DECODE (17 bytes)                      AFTER DECODE (17 bytes)
 129  * +----------+----------+----------------+      +----------+----------+----------------+
 130  * |  Length  | Header 1 | Actual Content |----->|  Length  | Header 1 | Actual Content |
 131  * | 0x00000C |  0xCAFE  | "HELLO, WORLD" |      | 0x00000C |  0xCAFE  | "HELLO, WORLD" |
 132  * +----------+----------+----------------+      +----------+----------+----------------+
 133  *
 134  *
 135  * 2 bytes length field at offset 1 in the middle of 4 bytes header,
 136  *     strip the first header field and the length field
 137  *
 138  * This is a combination of all the examples above.  There are the prepended
 139  * header before the length field and the extra header after the length field.
 140  * The prepended header affects the lengthFieldOffset and the extra
 141  * header affects the lengthAdjustment.  We also specified a non-zero
 142  * initialBytesToStrip to strip the length field and the prepended
 143  * header from the frame.  If you don't want to strip the prepended header, you
 144  * could specify 0 for initialBytesToSkip.
 145  *
 146  * lengthFieldOffset   = 1 (= the length of HDR1)
 147  * lengthFieldLength   = 2
 148  * lengthAdjustment    = 1 (= the length of HDR2)
 149  * initialBytesToStrip = 3 (= the length of HDR1 + LEN)
 150  *
 151  * BEFORE DECODE (16 bytes)                       AFTER DECODE (13 bytes)
 152  * +------+--------+------+----------------+      +------+----------------+
 153  * | HDR1 | Length | HDR2 | Actual Content |----->| HDR2 | Actual Content |
 154  * | 0xCA | 0x000C | 0xFE | "HELLO, WORLD" |      | 0xFE | "HELLO, WORLD" |
 155  * +------+--------+------+----------------+      +------+----------------+
 156  *
 157  *
 158  * 2 bytes length field at offset 1 in the middle of 4 bytes header,
 159  *     strip the first header field and the length field, the length field
 160  *     represents the length of the whole message
 161  *
 162  * Let's give another twist to the previous example.  The only difference from
 163  * the previous example is that the length field represents the length of the
 164  * whole message instead of the message body, just like the third example.
 165  * We have to count the length of HDR1 and Length into lengthAdjustment.
 166  * Please note that we don't need to take the length of HDR2 into account
 167  * because the length field already includes the whole header length.
 168  *
 169  * lengthFieldOffset   =  1
 170  * lengthFieldLength   =  2
 171  * lengthAdjustment    = -3 (= the length of HDR1 + LEN, negative)
 172  * initialBytesToStrip =  3
 173  *
 174  * BEFORE DECODE (16 bytes)                       AFTER DECODE (13 bytes)
 175  * +------+--------+------+----------------+      +------+----------------+
 176  * | HDR1 | Length | HDR2 | Actual Content |----->| HDR2 | Actual Content |
 177  * | 0xCA | 0x0010 | 0xFE | "HELLO, WORLD" |      | 0xFE | "HELLO, WORLD" |
 178  * +------+--------+------+----------------+      +------+----------------+
 179  *
 180  * @see LengthFieldPrepender
 181  */
 182 class LengthFieldBasedFrameDecoder : public ByteToMessageCodec {
 183  public:
 184   LengthFieldBasedFrameDecoder(
 185     uint32_t lengthFieldLength = 4,
 186     uint32_t maxFrameLength = UINT_MAX,
 187     uint32_t lengthFieldOffset = 0,
 188     uint32_t lengthAdjustment = 0,
 189     uint32_t initialBytesToStrip = 4,
 190     bool networkByteOrder = true);
 191
 192   std::unique_ptr<IOBuf> decode(Context* ctx, IOBufQueue& buf, size_t&);
 193
 194  private:
 195
 196   uint64_t getUnadjustedFrameLength(
 197     IOBufQueue& buf, int offset, int length, bool networkByteOrder);
 198
 199   uint32_t lengthFieldLength_;
 200   uint32_t maxFrameLength_;
 201   uint32_t lengthFieldOffset_;
 202   uint32_t lengthAdjustment_;
 203   uint32_t initialBytesToStrip_;
 204   bool networkByteOrder_;
 205
 206   uint32_t lengthFieldEndOffset_;
 207 };
 208
 209 }} // namespace