Merge commit 'df26c08a34b4e07235d33fcd7e2fb311d83f069f' into HEAD

Synchronize LZ4 with upstream
2025-12-06 17:27:55 +00:00 · 2015-05-29 15:03:27 -07:00
parent 6675ee7f5c df26c08a34
commit 4ff845ac91
62 changed files with 6254 additions and 2490 deletions
--- a/src/lz4/examples/Makefile
+++ b/src/lz4/examples/Makefile
@@ -1,6 +1,7 @@
 # ##########################################################################
 # LZ4 examples - Makefile
 # Copyright (C) Yann Collet 2011-2014
+#
 # GPL v2 License
 #
 # This program is free software; you can redistribute it and/or modify
@@ -21,29 +22,17 @@
 #  - LZ4 source repository : http://code.google.com/p/lz4/
 #  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
 # ##########################################################################
-# lz4 : Command Line Utility, supporting gzip-like arguments
-# lz4c  : CLU, supporting also legacy lz4demo arguments
-# lz4c32: Same as lz4c, but forced to compile in 32-bits mode
-# fuzzer  : Test tool, to check lz4 integrity on target platform
-# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode
-# fullbench  : Precisely measure speed for each LZ4 function variant
-# fullbench32: Same as fullbench, but forced to compile in 32-bits mode
+# This makefile compile and test
+# example programs, using (mostly) LZ4 streaming library,
+# kindly provided by Takayuki Matsuoka
 # ##########################################################################

-CC     := $(CC)
 CFLAGS ?= -O3
-CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -Wno-missing-braces   # Wno-missing-braces required due to GCC <4.8.3 bug
-FLAGS   = -I../lib $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
+FLAGS  := -I../lib $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)

 TESTFILE= Makefile
-LZ4DIR  = ../lib
-
-
-# Minimize test target for Travis CI's Build Matrix
-ifeq ($(LZ4_TRAVIS_CI_ENV),-m32)
-CFLAGS += -m32
-else ifeq ($(LZ4_TRAVIS_CI_ENV),-m64)
-endif
+LZ4DIR := ../lib


 # Define *.exe as extension for Windows systems
--- a/src/lz4/examples/README.md
+++ b/src/lz4/examples/README.md
@@ -0,0 +1,8 @@
+# LZ4 examples
+
+## Documents
+
+ - [Streaming API Basics](streaming_api_basics.md)
+ - Examples
+     - [Double Buffer](blockStreaming_doubleBuffer.md)
+     - [Line by Line Text Compression](blockStreaming_lineByLine.md)
--- a/src/lz4/examples/blockStreaming_doubleBuffer.c
+++ b/src/lz4/examples/blockStreaming_doubleBuffer.c
@@ -2,7 +2,10 @@
 // Copyright : Takayuki Matsuoka


-#define _CRT_SECURE_NO_WARNINGS // for MSVC
+#ifdef _MSC_VER    /* Visual Studio */
+#  define _CRT_SECURE_NO_WARNINGS
+#  define snprintf sprintf_s
+#endif
 #include "lz4.h"

 #include <stdio.h>
@@ -35,11 +38,13 @@ size_t read_bin(FILE* fp, void* array, size_t arrayBytes) {

 void test_compress(FILE* outFp, FILE* inpFp)
 {
-    LZ4_stream_t lz4Stream_body = { 0 };
+    LZ4_stream_t lz4Stream_body;
    LZ4_stream_t* lz4Stream = &lz4Stream_body;

    char inpBuf[2][BLOCK_BYTES];
    int  inpBufIndex = 0;
+    
+    LZ4_resetStream(lz4Stream);

    for(;;) {
        char* const inpPtr = inpBuf[inpBufIndex];
@@ -50,8 +55,8 @@ void test_compress(FILE* outFp, FILE* inpFp)

        {
            char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
-            const int cmpBytes = LZ4_compress_continue(
-                lz4Stream, inpPtr, cmpBuf, inpBytes);
+            const int cmpBytes = LZ4_compress_fast_continue(
+                lz4Stream, inpPtr, cmpBuf, inpBytes, sizeof(cmpBuf), 1);
            if(cmpBytes <= 0) {
                break;
            }
@@ -68,12 +73,14 @@ void test_compress(FILE* outFp, FILE* inpFp)

 void test_decompress(FILE* outFp, FILE* inpFp)
 {
-    LZ4_streamDecode_t lz4StreamDecode_body = { 0 };
+    LZ4_streamDecode_t lz4StreamDecode_body;
    LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;

    char decBuf[2][BLOCK_BYTES];
    int  decBufIndex = 0;

+    LZ4_setStreamDecode(lz4StreamDecode, NULL, 0);
+
    for(;;) {
        char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
        int  cmpBytes = 0;
--- a/src/lz4/examples/blockStreaming_doubleBuffer.md
+++ b/src/lz4/examples/blockStreaming_doubleBuffer.md
@@ -0,0 +1,100 @@
+# LZ4 Streaming API Example : Double Buffer
+by *Takayuki Matsuoka*
+
+`blockStreaming_doubleBuffer.c` is LZ4 Straming API example which implements double buffer (de)compression.
+
+Please note :
+
+ - Firstly, read "LZ4 Streaming API Basics".
+ - This is relatively advanced application example.
+ - Output file is not compatible with lz4frame and platform dependent.
+
+
+## What's the point of this example ?
+
+ - Handle huge file in small amount of memory
+ - Always better compression ratio than Block API
+ - Uniform block size
+
+
+## How the compression works
+
+First of all, allocate "Double Buffer" for input and LZ4 compressed data buffer for output.
+Double buffer has two pages, "first" page (Page#1) and "second" page (Page#2).
+
+```
+        Double Buffer
+
+      Page#1    Page#2
+    +---------+---------+
+    | Block#1 |         |
+    +----+----+---------+
+         |
+         v
+      {Out#1}
+
+
+      Prefix Dependency
+         +---------+
+         |         |
+         v         |
+    +---------+----+----+
+    | Block#1 | Block#2 |
+    +---------+----+----+
+                   |
+                   v
+                {Out#2}
+
+
+   External Dictionary Mode
+         +---------+
+         |         |
+         |         v
+    +----+----+---------+
+    | Block#3 | Block#2 |
+    +----+----+---------+
+         |
+         v
+      {Out#3}
+
+
+      Prefix Dependency
+         +---------+
+         |         |
+         v         |
+    +---------+----+----+
+    | Block#3 | Block#4 |
+    +---------+----+----+
+                   |
+                   v
+                {Out#4}
+```
+
+Next, read first block to double buffer's first page. And compress it by `LZ4_compress_continue()`.
+For the first time, LZ4 doesn't know any previous dependencies,
+so it just compress the line without dependencies and generates compressed block {Out#1} to LZ4 compressed data buffer.
+After that, write {Out#1} to the file.
+
+Next, read second block to double buffer's second page. And compress it.
+In this time, LZ4 can use dependency to Block#1 to improve compression ratio.
+This dependency is called "Prefix mode".
+
+Next, read third block to double buffer's *first* page. And compress it.
+Also this time, LZ4 can use dependency to Block#2.
+This dependency is called "External Dictonaly mode".
+
+Continue these procedure to the end of the file.
+
+
+## How the decompression works
+
+Decompression will do reverse order.
+
+ - Read first compressed block.
+ - Decompress it to the first page and write that page to the file.
+ - Read second compressed block.
+ - Decompress it to the second page and write that page to the file.
+ - Read third compressed block.
+ - Decompress it to the *first* page and write that page to the file.
+
+Continue these procedure to the end of the compressed file.
--- a/src/lz4/examples/blockStreaming_lineByLine.c
+++ b/src/lz4/examples/blockStreaming_lineByLine.c
@@ -2,7 +2,10 @@
 // Copyright : Takayuki Matsuoka


-#define _CRT_SECURE_NO_WARNINGS // for MSVC
+#ifdef _MSC_VER    /* Visual Studio */
+#  define _CRT_SECURE_NO_WARNINGS
+#  define snprintf sprintf_s
+#endif
 #include "lz4.h"

 #include <stdio.h>
@@ -38,8 +41,9 @@ static void test_compress(
    size_t ringBufferBytes)
 {
    LZ4_stream_t* const lz4Stream = LZ4_createStream();
-    char* const cmpBuf = malloc(LZ4_COMPRESSBOUND(messageMaxBytes));
-    char* const inpBuf = malloc(ringBufferBytes);
+    const size_t cmpBufBytes = LZ4_COMPRESSBOUND(messageMaxBytes);
+    char* const cmpBuf = (char*) malloc(cmpBufBytes);
+    char* const inpBuf = (char*) malloc(ringBufferBytes);
    int inpOffset = 0;

    for ( ; ; )
@@ -60,8 +64,8 @@ static void test_compress(
 #endif

        {
-            const int cmpBytes = LZ4_compress_continue(
-                lz4Stream, inpPtr, cmpBuf, inpBytes);
+            const int cmpBytes = LZ4_compress_fast_continue(
+                lz4Stream, inpPtr, cmpBuf, inpBytes, cmpBufBytes, 1);
            if (cmpBytes <= 0) break;
            write_uint16(outFp, (uint16_t) cmpBytes);
            write_bin(outFp, cmpBuf, cmpBytes);
@@ -86,8 +90,8 @@ static void test_decompress(
    size_t ringBufferBytes)
 {
    LZ4_streamDecode_t* const lz4StreamDecode = LZ4_createStreamDecode();
-    char* const cmpBuf = malloc(LZ4_COMPRESSBOUND(messageMaxBytes));
-    char* const decBuf = malloc(ringBufferBytes);
+    char* const cmpBuf = (char*) malloc(LZ4_COMPRESSBOUND(messageMaxBytes));
+    char* const decBuf = (char*) malloc(ringBufferBytes);
    int decOffset = 0;

    for ( ; ; )
@@ -121,8 +125,8 @@ static int compare(FILE* f0, FILE* f1)
 {
    int result = 0;
    const size_t tempBufferBytes = 65536;
-    char* const b0 = malloc(tempBufferBytes);
-    char* const b1 = malloc(tempBufferBytes);
+    char* const b0 = (char*) malloc(tempBufferBytes);
+    char* const b1 = (char*) malloc(tempBufferBytes);

    while(0 == result)
    {
--- a/src/lz4/examples/blockStreaming_lineByLine.md
+++ b/src/lz4/examples/blockStreaming_lineByLine.md
@@ -0,0 +1,122 @@
+# LZ4 Streaming API Example : Line by Line Text Compression
+by *Takayuki Matsuoka*
+
+`blockStreaming_lineByLine.c` is LZ4 Straming API example which implements line by line incremental (de)compression.
+
+Please note the following restrictions :
+
+ - Firstly, read "LZ4 Streaming API Basics".
+ - This is relatively advanced application example.
+ - Output file is not compatible with lz4frame and platform dependent.
+
+
+## What's the point of this example ?
+
+ - Line by line incremental (de)compression.
+ - Handle huge file in small amount of memory
+ - Generally better compression ratio than Block API
+ - Non-uniform block size
+
+
+## How the compression works
+
+First of all, allocate "Ring Buffer" for input and LZ4 compressed data buffer for output.
+
+```
+(1)
+    Ring Buffer
+
+    +--------+
+    | Line#1 |
+    +---+----+
+        |
+        v
+     {Out#1}
+
+
+(2)
+    Prefix Mode Dependency
+          +----+
+          |    |
+          v    |
+    +--------+-+------+
+    | Line#1 | Line#2 |
+    +--------+---+----+
+                 |
+                 v
+              {Out#2}
+
+
+(3)
+          Prefix   Prefix
+          +----+   +----+
+          |    |   |    |
+          v    |   v    |
+    +--------+-+------+-+------+
+    | Line#1 | Line#2 | Line#3 |
+    +--------+--------+---+----+
+                          |
+                          v
+                       {Out#3}
+
+
+(4)
+                        External Dictionary Mode
+                +----+   +----+
+                |    |   |    |
+                v    |   v    |
+    ------+--------+-+------+-+--------+
+          |  ....  | Line#X | Line#X+1 |
+    ------+--------+--------+-----+----+
+                            ^     |
+                            |     v
+                            |  {Out#X+1}
+                            |
+                          Reset
+
+
+(5)
+                                    Prefix
+                                    +-----+
+                                    |     |
+                                    v     |
+    ------+--------+--------+----------+--+-------+
+          |  ....  | Line#X | Line#X+1 | Line#X+2 |
+    ------+--------+--------+----------+-----+----+
+                            ^                |
+                            |                v
+                            |            {Out#X+2}
+                            |
+                          Reset
+```
+
+Next (see (1)), read first line to ringbuffer and compress it by `LZ4_compress_continue()`.
+For the first time, LZ4 doesn't know any previous dependencies,
+so it just compress the line without dependencies and generates compressed line {Out#1} to LZ4 compressed data buffer.
+After that, write {Out#1} to the file and forward ringbuffer offset.
+
+Do the same things to second line (see (2)).
+But in this time, LZ4 can use dependency to Line#1 to improve compression ratio.
+This dependency is called "Prefix mode".
+
+Eventually, we'll reach end of ringbuffer at Line#X (see (4)).
+This time, we should reset ringbuffer offset.
+After resetting, at Line#X+1 pointer is not adjacent, but LZ4 still maintain its memory.
+This is called "External Dictionary Mode".
+
+In Line#X+2 (see (5)), finally LZ4 forget almost all memories but still remains Line#X+1.
+This is the same situation as Line#2.
+
+Continue these procedure to the end of text file.
+
+
+## How the decompression works
+
+Decompression will do reverse order.
+
+ - Read compressed line from the file to buffer.
+ - Decompress it to the ringbuffer.
+ - Output decompressed plain text line to the file.
+ - Forward ringbuffer offset. If offset exceedes end of the ringbuffer, reset it.
+
+Continue these procedure to the end of the compressed file.
--- a/src/lz4/examples/streaming_api_basics.md
+++ b/src/lz4/examples/streaming_api_basics.md
@@ -0,0 +1,87 @@
+# LZ4 Streaming API Basics
+by *Takayuki Matsuoka*
+## LZ4 API sets
+
+LZ4 has the following API sets :
+
+ - "Auto Framing" API (lz4frame.h) :
+   This is most recommended API for usual application.
+   It guarantees interoperability with other LZ4 framing format compliant tools/libraries
+   such as LZ4 command line utility, node-lz4, etc.
+ - "Block" API : This is recommended for simple purpose.
+   It compress single raw memory block to LZ4 memory block and vice versa.
+ - "Streaming" API : This is designed for complex thing.
+   For example, compress huge stream data in restricted memory environment.
+
+Basically, you should use "Auto Framing" API.
+But if you want to write advanced application, it's time to use Block or Streaming APIs.
+
+
+## What is difference between Block and Streaming API ?
+
+Block API (de)compresses single contiguous memory block.
+In other words, LZ4 library find redundancy from single contiguous memory block.
+Streaming API does same thing but (de)compress multiple adjacent contiguous memory block.
+So LZ4 library could find more redundancy than Block API.
+
+The following figure shows difference between API and block sizes.
+In these figures, original data is splitted to 4KiBytes contiguous chunks.
+
+```
+Original Data
+    +---------------+---------------+----+----+----+
+    | 4KiB Chunk A  | 4KiB Chunk B  | C  | D  |... |
+    +---------------+---------------+----+----+----+
+
+Example (1) : Block API, 4KiB Block
+    +---------------+---------------+----+----+----+
+    | 4KiB Chunk A  | 4KiB Chunk B  | C  | D  |... |
+    +---------------+---------------+----+----+----+
+    | Block #1      | Block #2      | #3 | #4 |... |
+    +---------------+---------------+----+----+----+
+    
+                    (No Dependency)
+
+
+Example (2) : Block API, 8KiB Block
+    +---------------+---------------+----+----+----+
+    | 4KiB Chunk A  | 4KiB Chunk B  | C  | D  |... |
+    +---------------+---------------+----+----+----+
+    |            Block #1           |Block #2 |... |
+    +--------------------+----------+-------+-+----+
+          ^              |             ^    |
+          |              |             |    |
+          +--------------+             +----+
+          Internal Dependency          Internal Dependency
+
+
+Example (3) : Streaming API, 4KiB Block
+    +---------------+---------------+-----+----+----+
+    | 4KiB Chunk A  | 4KiB Chunk B  | C   | D  |... |
+    +---------------+---------------+-----+----+----+
+    | Block #1      | Block #2      | #3  | #4 |... |
+    +---------------+----+----------+-+---+-+--+----+
+          ^              |   ^        | ^   |
+          |              |   |        | |   |
+          +--------------+   +--------+ +---+
+          Dependency         Dependency Dependency
+```
+
+ - In example (1), there is no dependency.
+   All blocks are compressed independently.
+ - In example (2), naturally 8KiBytes block has internal dependency.
+   But still block #1 and #2 are compressed independently.
+ - In example (3), block #2 has dependency to #1,
+   also #3 has dependency to #2 and #1, #4 has #3, #2 and #1, and so on.
+
+Here, we can observe difference between example (2) and (3).
+In (2), there's no dependency between chunk B and C, but (3) has dependency between B and C.
+This dependency improves compression ratio.
+
+
+## Restriction of Streaming API
+
+For the efficiency, Streaming API doesn't keep mirror copy of dependent (de)compressed memory.
+This means users should keep these dependent (de)compressed memory explicitly.
+Usually, "Dependent memory" is previous adjacent contiguous memory up to 64KiBytes.
+LZ4 will not access further memories.