2 LZ4 HC Encoder - Part of LZ4 HC algorithm
\r
3 Copyright (C) 2011-2013, Yann Collet.
\r
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
\r
6 Redistribution and use in source and binary forms, with or without
\r
7 modification, are permitted provided that the following conditions are
\r
10 * Redistributions of source code must retain the above copyright
\r
11 notice, this list of conditions and the following disclaimer.
\r
12 * Redistributions in binary form must reproduce the above
\r
13 copyright notice, this list of conditions and the following disclaimer
\r
14 in the documentation and/or other materials provided with the
\r
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
\r
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
\r
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
\r
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
\r
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
\r
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
\r
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
\r
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
29 You can contact the author at :
\r
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
\r
31 - LZ4 source repository : http://code.google.com/p/lz4/
\r
34 /* lz4hc_encoder.h must be included into lz4hc.c
\r
35 The objective of this file is to create a single LZ4 compression function source
\r
36 which will be instanciated multiple times with minor variations
\r
37 depending on a set of #define.
\r
40 //****************************
\r
41 // Check required defines
\r
42 //****************************
\r
44 #ifndef FUNCTION_NAME
\r
45 # error "FUNTION_NAME is not defined"
\r
49 //****************************
\r
50 // Local definitions
\r
51 //****************************
\r
52 #define COMBINED_NAME_RAW(n1,n2) n1 ## n2
\r
53 #define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2)
\r
54 #define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence)
\r
55 #ifdef LIMITED_OUTPUT
\r
56 # define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0;
\r
58 # define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r)
\r
61 //****************************
\r
63 //****************************
\r
65 forceinline static int ENCODE_SEQUENCE_NAME (
\r
68 const BYTE** anchor,
\r
71 #ifdef LIMITED_OUTPUT
\r
79 // Encode Literal length
\r
80 length = (int)(*ip - *anchor);
\r
82 #ifdef LIMITED_OUTPUT
\r
83 if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit
\r
85 if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
\r
86 else *token = (BYTE)(length<<ML_BITS);
\r
89 LZ4_BLINDCOPY(*anchor, *op, length);
\r
92 LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
\r
94 // Encode MatchLength
\r
95 length = (int)(matchLength-MINMATCH);
\r
96 #ifdef LIMITED_OUTPUT
\r
97 if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit
\r
99 if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
\r
100 else *token += (BYTE)(length);
\r
102 // Prepare next loop
\r
103 *ip += matchLength;
\r
110 int COMBINED_NAME(FUNCTION_NAME,_continue) (
\r
112 const char* source,
\r
115 #ifdef LIMITED_OUTPUT
\r
120 LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
\r
121 const BYTE* ip = (const BYTE*) source;
\r
122 const BYTE* anchor = ip;
\r
123 const BYTE* const iend = ip + inputSize;
\r
124 const BYTE* const mflimit = iend - MFLIMIT;
\r
125 const BYTE* const matchlimit = (iend - LASTLITERALS);
\r
127 BYTE* op = (BYTE*) dest;
\r
128 #ifdef LIMITED_OUTPUT
\r
129 BYTE* const oend = op + maxOutputSize;
\r
132 int ml, ml2, ml3, ml0;
\r
133 const BYTE* ref=NULL;
\r
134 const BYTE* start2=NULL;
\r
135 const BYTE* ref2=NULL;
\r
136 const BYTE* start3=NULL;
\r
137 const BYTE* ref3=NULL;
\r
138 const BYTE* start0;
\r
141 // Ensure blocks follow each other
\r
142 if (ip != ctx->end) return 0;
\r
143 ctx->end += inputSize;
\r
148 while (ip < mflimit)
\r
150 ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
\r
151 if (!ml) { ip++; continue; }
\r
153 // saved, in case we would skip too much
\r
159 if (ip+ml < mflimit)
\r
160 ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
\r
163 if (ml2 == ml) // No better match
\r
165 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
\r
171 if (start2 < ip + ml0) // empirical
\r
179 // Here, start0==ip
\r
180 if ((start2 - ip) < 3) // First Match too small : removed
\r
189 // Currently we have :
\r
191 // ip1+3 <= ip2 (usually < ip1+ml1)
\r
192 if ((start2 - ip) < OPTIMAL_ML)
\r
196 if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
\r
197 if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
\r
198 correction = new_ml - (int)(start2 - ip);
\r
199 if (correction > 0)
\r
201 start2 += correction;
\r
202 ref2 += correction;
\r
206 // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
\r
208 if (start2 + ml2 < mflimit)
\r
209 ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
\r
212 if (ml3 == ml2) // No better match : 2 sequences to encode
\r
214 // ip & ref are known; Now for ml
\r
215 if (start2 < ip+ml) ml = (int)(start2 - ip);
\r
216 // Now, encode 2 sequences
\r
217 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
\r
219 ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend);
\r
223 if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
\r
225 if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
\r
227 if (start2 < ip+ml)
\r
229 int correction = (int)(ip+ml - start2);
\r
230 start2 += correction;
\r
231 ref2 += correction;
\r
233 if (ml2 < MINMATCH)
\r
241 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
\r
258 // OK, now we have 3 ascending matches; let's write at least the first one
\r
259 // ip & ref are known; Now for ml
\r
260 if (start2 < ip+ml)
\r
262 if ((start2 - ip) < (int)ML_MASK)
\r
265 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
\r
266 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
\r
267 correction = ml - (int)(start2 - ip);
\r
268 if (correction > 0)
\r
270 start2 += correction;
\r
271 ref2 += correction;
\r
277 ml = (int)(start2 - ip);
\r
280 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
\r
294 // Encode Last Literals
\r
296 int lastRun = (int)(iend - anchor);
\r
297 #ifdef LIMITED_OUTPUT
\r
298 if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
\r
300 if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
\r
301 else *op++ = (BYTE)(lastRun<<ML_BITS);
\r
302 memcpy(op, anchor, iend - anchor);
\r
307 return (int) (((char*)op)-dest);
\r
311 int FUNCTION_NAME (const char* source,
\r
314 #ifdef LIMITED_OUTPUT
\r
319 void* ctx = LZ4_createHC(source);
\r
321 if (ctx==NULL) return 0;
\r
322 #ifdef LIMITED_OUTPUT
\r
323 result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize, maxOutputSize);
\r
325 result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize);
\r
333 //****************************
\r
335 //****************************
\r
337 // Required defines
\r
338 #undef FUNCTION_NAME
\r
340 // Locally Generated
\r
341 #undef ENCODE_SEQUENCE
\r
342 #undef ENCODE_SEQUENCE_NAME
\r
344 // Optional defines
\r
345 #ifdef LIMITED_OUTPUT
\r
346 #undef LIMITED_OUTPUT
\r