Skip to content

Commit 4ec960a

Browse files
AliSQLAliSQL
authored andcommitted
[feature] [tokudb] Issue: #63 Add compression ratio statistics per FT
TokuDB has status to expose the compression ratio: mysql> show global status like 'tokudb%COMPRESSION_RATIO'; +---------------------------------------+----------+ | Variable_name | Value | +---------------------------------------+----------+ | Tokudb_LEAF_NODE_COMPRESSION_RATIO | 5.850260 | | Tokudb_NONLEAF_NODE_COMPRESSION_RATIO | 0.564453 | | Tokudb_OVERALL_NODE_COMPRESSION_RATIO | 4.528809 | +---------------------------------------+----------+ but this a global level ratio, and sometimes the users may be interested in table level compression ratio. This patch address this request by exposing FT level compresstion ratio through information_schema.TokuDB_fractal_tree_info table. We add two columns, leaf_compress_ratio and internal_compress_ratio, which indicate the compression ratio of leaf nodes and internal nodes accordingly. Limitations, the comppression ratio infos are maintained in memeory, and will lost when server is restarted or ft get evicted from memory. Maybe in future there infos will be persisted in ft file, when this feature is accepted by PerconaFT upstream.
1 parent 349aa36 commit 4ec960a

File tree

11 files changed

+118
-3
lines changed

11 files changed

+118
-3
lines changed

mysql-test/suite/funcs_1/r/is_columns_is.result

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,9 @@ def information_schema TokuDB_fractal_tree_info bt_num_blocks_in_use 4 0 NO bigi
350350
def information_schema TokuDB_fractal_tree_info bt_size_allocated 5 0 NO bigint NULL NULL 19 0 NULL NULL NULL bigint(0) select
351351
def information_schema TokuDB_fractal_tree_info bt_size_in_use 6 0 NO bigint NULL NULL 19 0 NULL NULL NULL bigint(0) select
352352
def information_schema TokuDB_fractal_tree_info dictionary_name 1 NO varchar 256 768 NULL NULL NULL utf8 utf8_general_ci varchar(256) select
353+
def information_schema TokuDB_fractal_tree_info internal_compress_ratio 11 0 NO double NULL NULL 12 NULL NULL NULL NULL double select
353354
def information_schema TokuDB_fractal_tree_info internal_file_name 2 NO varchar 256 768 NULL NULL NULL utf8 utf8_general_ci varchar(256) select
355+
def information_schema TokuDB_fractal_tree_info leaf_compress_ratio 10 0 NO double NULL NULL 12 NULL NULL NULL NULL double select
354356
def information_schema TokuDB_fractal_tree_info table_dictionary_name 9 NO varchar 256 768 NULL NULL NULL utf8 utf8_general_ci varchar(256) select
355357
def information_schema TokuDB_fractal_tree_info table_name 8 NO varchar 256 768 NULL NULL NULL utf8 utf8_general_ci varchar(256) select
356358
def information_schema TokuDB_fractal_tree_info table_schema 7 NO varchar 256 768 NULL NULL NULL utf8 utf8_general_ci varchar(256) select
@@ -449,6 +451,7 @@ ORDER BY CHARACTER_SET_NAME, COLLATION_NAME, COL_CML;
449451
COL_CML DATA_TYPE CHARACTER_SET_NAME COLLATION_NAME
450452
NULL bigint NULL NULL
451453
NULL datetime NULL NULL
454+
NULL double NULL NULL
452455
NULL int NULL NULL
453456
NULL tinyint NULL NULL
454457
--> CHAR(0) is allowed (see manual), and here both CHARACHTER_* values
@@ -819,6 +822,8 @@ NULL information_schema TokuDB_fractal_tree_info bt_size_in_use bigint NULL NULL
819822
3.0000 information_schema TokuDB_fractal_tree_info table_schema varchar 256 768 utf8 utf8_general_ci varchar(256)
820823
3.0000 information_schema TokuDB_fractal_tree_info table_name varchar 256 768 utf8 utf8_general_ci varchar(256)
821824
3.0000 information_schema TokuDB_fractal_tree_info table_dictionary_name varchar 256 768 utf8 utf8_general_ci varchar(256)
825+
NULL information_schema TokuDB_fractal_tree_info leaf_compress_ratio double NULL NULL NULL NULL double
826+
NULL information_schema TokuDB_fractal_tree_info internal_compress_ratio double NULL NULL NULL NULL double
822827
NULL information_schema TokuDB_locks locks_trx_id bigint NULL NULL NULL NULL bigint(0)
823828
NULL information_schema TokuDB_locks locks_mysql_thread_id bigint NULL NULL NULL NULL bigint(0)
824829
3.0000 information_schema TokuDB_locks locks_dname varchar 256 768 utf8 utf8_general_ci varchar(256)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
create table t1(c1 int) engine = tokudb;
2+
create table t2(c1 int) engine = tokudb;
3+
insert into t1 values (rand());
4+
select count(*) from t1;
5+
count(*)
6+
3145726
7+
select count(*) from t2;
8+
count(*)
9+
6291410
10+
set global tokudb_checkpoint_on_flush_logs=ON;
11+
flush logs;
12+
set global tokudb_checkpoint_on_flush_logs=OFF;
13+
select leaf_compress_ratio > 3, internal_compress_ratio > 1 from information_schema.TokuDB_fractal_tree_info where table_dictionary_name = 'main';
14+
leaf_compress_ratio > 3 internal_compress_ratio > 1
15+
1 0
16+
1 0
17+
flush tables;
18+
set global tokudb_checkpoint_on_flush_logs=ON;
19+
flush logs;
20+
set global tokudb_checkpoint_on_flush_logs=OFF;
21+
select leaf_compress_ratio > 3, internal_compress_ratio > 1 from information_schema.TokuDB_fractal_tree_info where table_dictionary_name = 'main';
22+
leaf_compress_ratio > 3 internal_compress_ratio > 1
23+
1 0
24+
1 0
25+
drop table t1;
26+
drop table t2;
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
--source include/have_tokudb.inc
2+
--source include/have_innodb.inc
3+
4+
create table t1(c1 int) engine = tokudb;
5+
create table t2(c1 int) engine = tokudb;
6+
7+
insert into t1 values (rand());
8+
9+
--let $i=0
10+
--let $count=20
11+
--disable_query_log
12+
while ($i<$count)
13+
{
14+
inc $i;
15+
eval insert into t1 values (rand());
16+
eval insert into t1 select * from t1;
17+
eval insert into t2 select * from t1;
18+
}
19+
--enable_query_log
20+
21+
select count(*) from t1;
22+
select count(*) from t2;
23+
24+
# trigger checkpoint
25+
set global tokudb_checkpoint_on_flush_logs=ON;
26+
flush logs;
27+
set global tokudb_checkpoint_on_flush_logs=OFF;
28+
29+
select leaf_compress_ratio > 3, internal_compress_ratio > 1 from information_schema.TokuDB_fractal_tree_info where table_dictionary_name = 'main';
30+
flush tables;
31+
32+
set global tokudb_checkpoint_on_flush_logs=ON;
33+
flush logs;
34+
set global tokudb_checkpoint_on_flush_logs=OFF;
35+
36+
select leaf_compress_ratio > 3, internal_compress_ratio > 1 from information_schema.TokuDB_fractal_tree_info where table_dictionary_name = 'main';
37+
38+
drop table t1;
39+
drop table t2;

storage/tokudb/ft-index/buildheader/make_tdb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ static void print_db_struct (void) {
553553
"int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
554554
"int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, uint32_t flags)",
555555
"int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, uint32_t flags)",
556-
"int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
556+
"int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*,double*,double*)",
557557
"int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
558558
"const char *(*get_dname)(DB *db)",
559559
"int (*get_last_key)(DB *db, YDB_CALLBACK_FUNCTION func, void* extra)",

storage/tokudb/ft-index/ft/ft-internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ struct ft {
264264
// - if the score is high enough, we optimistically attempt to insert directly into the rightmost leaf
265265
// - if our attempt fails because the key was not in range of the rightmost leaf, we reset the score back to 0
266266
uint32_t seqinsert_score;
267+
268+
// counters for uncompressed/compressed size of leaf/internal nodes
269+
unsigned long long leaf_uncompressed_bytes;
270+
unsigned long long leaf_compressed_bytes;
271+
unsigned long long internal_uncompressed_bytes;
272+
unsigned long long internal_compressed_bytes;
267273
};
268274

269275
// Allocate a DB struct off the stack and only set its comparison
@@ -627,6 +633,7 @@ typedef struct {
627633

628634
void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe);
629635
void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
636+
void toku_ft_status_update_flush_for_ft(FTNODE node, FT ft, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written);
630637
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
631638
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
632639
void toku_ft_status_note_msn_discard(void);

storage/tokudb/ft-index/ft/ft-ops.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,17 @@ void toku_ft_status_update_flush_reason(FTNODE node,
799799
}
800800
}
801801

802+
void toku_ft_status_update_flush_for_ft(FTNODE node, FT ft, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written)
803+
{
804+
if (node->height == 0) {
805+
ft->leaf_uncompressed_bytes += uncompressed_bytes_flushed;
806+
ft->leaf_compressed_bytes += bytes_written;
807+
} else {
808+
ft->internal_uncompressed_bytes += uncompressed_bytes_flushed;
809+
ft->internal_compressed_bytes += bytes_written;
810+
}
811+
}
812+
802813
void toku_ftnode_checkpoint_complete_callback(void *value_data) {
803814
FTNODE node = static_cast<FTNODE>(value_data);
804815
if (node->height > 0) {

storage/tokudb/ft-index/ft/ft-ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ struct ftinfo64 {
292292
uint64_t num_blocks_in_use; // number of blocks in use by most recent checkpoint
293293
uint64_t size_allocated; // sum of sizes of blocks in blocktable
294294
uint64_t size_in_use; // sum of sizes of blocks in use by most recent checkpoint
295+
double leaf_ratio; // compress ratio for leaf node
296+
double internal_ratio; // compress ratio for internal node
295297
};
296298

297299
void toku_ft_handle_get_fractal_tree_info64(FT_HANDLE, struct ftinfo64 *);

storage/tokudb/ft-index/ft/ft.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,11 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
409409
ft_note_unpin_by_checkpoint);
410410

411411
ft->blocktable.verify_no_free_blocknums();
412+
413+
ft->leaf_uncompressed_bytes = 0;
414+
ft->leaf_compressed_bytes = 0;
415+
ft->internal_uncompressed_bytes = 0;
416+
ft->internal_compressed_bytes = 0;
412417
}
413418

414419

@@ -492,6 +497,10 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
492497
ft->cmp.create(ft_handle->options.compare_fun, &ft->cmp_descriptor, ft_handle->options.memcmp_magic);
493498
ft->update_fun = ft_handle->options.update_fun;
494499
ft->cf = cf;
500+
ft->leaf_uncompressed_bytes = 0;
501+
ft->leaf_compressed_bytes = 0;
502+
ft->internal_uncompressed_bytes = 0;
503+
ft->internal_compressed_bytes = 0;
495504
toku_cachefile_set_userdata(cf,
496505
reinterpret_cast<void *>(ft),
497506
ft_log_fassociate_during_checkpoint,
@@ -878,6 +887,10 @@ toku_ft_stat64 (FT ft, struct ftstat64_s *s) {
878887

879888
void toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *info) {
880889
ft->blocktable.get_info64(info);
890+
info->leaf_ratio = ft->leaf_compressed_bytes > 0 ?
891+
(double)ft->leaf_uncompressed_bytes / (double)ft->leaf_compressed_bytes : 0;
892+
info->internal_ratio = ft->internal_compressed_bytes > 0 ?
893+
(double)ft->internal_uncompressed_bytes / (double)ft->internal_compressed_bytes : 0;
881894
}
882895

883896
int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra) {

storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,7 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
878878

879879
tokutime_t io_time = t1 - t0;
880880
toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
881+
toku_ft_status_update_flush_for_ft(node, ft, n_uncompressed_bytes, n_to_write);
881882

882883
toku_free(compressed_buf);
883884
node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.

storage/tokudb/ft-index/src/ydb_db.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,14 +733,16 @@ toku_db_set_memcmp_magic(DB *db, uint8_t magic) {
733733
}
734734

735735
static int
736-
toku_db_get_fractal_tree_info64(DB *db, uint64_t *num_blocks_allocated, uint64_t *num_blocks_in_use, uint64_t *size_allocated, uint64_t *size_in_use) {
736+
toku_db_get_fractal_tree_info64(DB *db, uint64_t *num_blocks_allocated, uint64_t *num_blocks_in_use, uint64_t *size_allocated, uint64_t *size_in_use, double *leaf_compress_ratio, double *internal_compress_ratio) {
737737
HANDLE_PANICKED_DB(db);
738738
struct ftinfo64 ftinfo;
739739
toku_ft_handle_get_fractal_tree_info64(db->i->ft_handle, &ftinfo);
740740
*num_blocks_allocated = ftinfo.num_blocks_allocated;
741741
*num_blocks_in_use = ftinfo.num_blocks_in_use;
742742
*size_allocated = ftinfo.size_allocated;
743743
*size_in_use = ftinfo.size_in_use;
744+
*leaf_compress_ratio = ftinfo.leaf_ratio;
745+
*internal_compress_ratio = ftinfo.internal_ratio;
744746
return 0;
745747
}
746748

0 commit comments

Comments
 (0)