From fd2082df7a818b48bdf16d98a26e27be1600a68d Mon Sep 17 00:00:00 2001 From: Andrew Tolbert Date: Sun, 28 Feb 2016 23:30:43 -0600 Subject: [PATCH] Update README.md for 3.0.0-alpha3. --- README.md | 278 +++++++++++++++++------------------------------------- 1 file changed, 88 insertions(+), 190 deletions(-) diff --git a/README.md b/README.md index 70c5b14..504cbec 100644 --- a/README.md +++ b/README.md @@ -2,27 +2,34 @@ [![Build Status](https://travis-ci.org/tolbertam/sstable-tools.svg?branch=master)](https://travis-ci.org/tolbertam/sstable-tools)[ ![Download](https://api.bintray.com/packages/tolbertam/sstable-tools/sstable-tools.jar/images/download.svg) ](https://bintray.com/tolbertam/sstable-tools/sstable-tools.jar/_latestVersion) -A toolkit for parsing, creating and doing other fun stuff with Cassandra 3.x SSTables. This project is under heavy development and not yet stable. +A toolkit for parsing, creating and doing other fun stuff with Cassandra 3.x SSTables. This project is under development and not yet stable. Pre compiled binary available from bintray: -* [sstable-tools-3.0.0-alpha2.jar](https://bintray.com/artifact/download/tolbertam/sstable-tools/sstable-tools-3.0.0-alpha2.jar) - Currently tested with 3.0, 3.1, 3.1.1, 3.2.1, 3.3. +* [sstable-tools-3.0.0-alpha3.jar](https://bintray.com/artifact/download/tolbertam/sstable-tools/sstable-tools-3.0.0-alpha3.jar) - Currently tested with 3.0, 3.1, 3.1.1, 3.2.1, 3.3. Example usage: - java -jar sstable-tools-3.0.0-alpha2.jar toJson ma-2-big-Data.db - java -jar sstable-tools-3.0.0-alpha2.jar describe ma-2-big-Data.db - java -jar sstable-tools-3.0.0-alpha3.jar SELECT count(*) FROM ma-2-big-Data.db WHERE age > 30 (TODO: Escape this as it doesnt work as is.) - + java -jar sstable-tools.jar cqlsh + java -jar sstable-tools.jar toJson ma-2-big-Data.db + java -jar sstable-tools.jar describe ma-2-big-Data.db + Example shell usage: - java -jar sstable-tools-3.0.0-alpha2.jar cqlsh - - ## Select one or more (space delimited, or choose directory to include all) + java -jar sstable-tools.jar cqlsh + + ## Select one or more sstables (space delimited, or choose directory to include all) cqlsh> use ma-2-big-Data.db; - - ## Use create table statement to set the schema (can view with 'describe schema'). This is optional but without - ## it the partition key and clustering index names are unknown. + Using: /home/user/sstable-tools/ma-2-big-Data.db + + ## Use predefined schema file. + ## Can view with 'schema'. + ## This is optional but without it the partition + ## key and clustering index names are unknown. + cqlsh> schema schema.cql + Successfully imported schema from '/home/user/sstable-tools/schema.cql'. + + ## Alternatively, use 'CREATE TABLE' statement to enter a schema. cqlsh> CREATE TABLE users ( ... user_name varchar PRIMARY KEY, ... password varchar, @@ -30,23 +37,25 @@ Example shell usage: ... state varchar, ... birth_year bigint ... ); - + ## Discover the data in sstable(s) using CQL queries cqlsh> SELECT * FROM sstable WHERE age > 1 LIMIT 1 - + ┌────────────┬─────────────┬─────────┬───────────┬────────┐ │user_name │birth_year │gender │password │state │ ╞════════════╪═════════════╪═════════╪═══════════╪════════╡ │frodo │1985 │male │pass@ │CA │ └────────────┴─────────────┴─────────┴───────────┴────────┘ - - ## Display raw sstable data (useful to see tombstones and expired ttls) with optional where clause + + ## Display raw sstable data (useful to see tombstones and expired ttls) + ## with optional where clause cqlsh> DUMP WHERE age > 1 LIMIT 1 + [frodo] Row[info=[ts=1455937221199050] ]: | [birth_year=1985 ts=1455937221199050], [gender=male ts=1455937221199050], [password=pass@ ts=1455937221199050], [state=CA ts=1455937221199050] - + ## Describe the sstable data and metadata cqlsh> describe sstables; - + /Users/clohfink/git/sstable-tools/./src/test/resources/ma-2-big-Data.db ======================================================================= Partitions: 1 @@ -60,7 +69,7 @@ Example shell usage: Tombstone Leaders: Partitioner: org.apache.cassandra.dht.Murmur3Partitioner Bloom Filter FP chance: 0.010000 - Size: 50 (50 B) + Size: 50 (50 B) Compressor: org.apache.cassandra.io.compress.LZ4Compressor Compression ratio: -1.0 Minimum timestamp: 1455937221199050 (02/19/2016 21:00:21) @@ -69,19 +78,39 @@ Example shell usage: SSTable max local deletion time: 2147483647 (01/18/2038 21:14:07) TTL min: 0 (0 milliseconds) ...[snip]... - ->>>>>>> update readme a little -**Note:** No environment configuration is necessary for this tool to work if all components of the sstable are available but the cql create statement allows for more details. - -**Note:** CQL statements require bash escaping when using the "select" command via command line ```java -jar sstable-tools.jar select \* from \"path\"``` + ## Paging is enabled by default and can be manipulated by using 'PAGING' + cqlsh> PAGING 20; + Now Query paging is enabled + Page size: 20 + cqlsh> PAGING OFF; + Disabled Query paging. + cqlsh> PAGING ON; + Now Query paging is enabled + Page size: 20 + + ## Used sstables, schema, and paging settings and persisted for future use. + ## Use the 'PERSIST' command to view preferences and to enable/disable + ## persistence. + cqlsh> PERSIST; + Preferences are currently enabled: + pagingEnabled=true + pagingSize=20 + preferencesEnabled=true + schema="" + sstables=[ + "/home/user/sstable-tools/ma--big-Data.db" + ] + cqlsh> PERSIST OFF; + Disabled Preferences. -see more below +**Note:** No environment configuration is necessary for this tool to work if all components of the sstable are available but the cql create statement allows for more details. **Features:** +* [cqlsh](#cqlsh) - Drop into an interactive shell to make queries against SSTables. +* [describe](#describe) - Describe SSTable data and metadata. * [sstable2json](#sstable2json) - Utility for exporting C\* 3.X SSTables into JSON. -* [select](#select) - Make CQL queries against SSTables ## Building @@ -95,36 +124,58 @@ mvn package The executable jar will be present in the target directory. ## cqlsh -cql shell similiar and modeled after the C* cqlsh tool. This will allow issuing cql queries against raw sstables and -providing additional diagnostic tools over them. Provides history (reverse searchable with ctrl-r) for ease of use. +cql shell similiar and modeled after the C* cqlsh tool. Enables issuing cql queries against raw sstables and +provides additional diagnostic tools over them. Provides history (reverse searchable with ctrl-r) for ease of use. ```text -EXIT - leaves the shell (also ctrl-d on prompt, ctrl-c to break back to blank prompt) -CREATE TABLE ... - A CREATE TABLE cql statement to use as metadata when reading sstables (HIGHLY RECOMMENDED!) -DESCRIBE SCHEMA - Show currently used schema (or serialized cfmetadata if generated) -DESCRIBE SSTABLES - Provide details and statistics on current sstable(s) -USE - update the sstable[s] used by default with select, dump, describe commands +Commands: + +HELP - prints this message +EXIT - leaves the shell +CREATE TABLE ... - A CREATE TABLE cql statement to use as metadata when reading sstables (HIGHLY RECOMMENDED!) +DESCRIBE SCHEMA - Show currently used schema (or serialized cfmetadata if generated) +DESCRIBE SSTABLES - Provide details and statistics on current sstable(s) +PAGING [(ON|OFF)] - Enables, disables, or shows current status of query paging. +PAGING - Enables paging and sets paging size. +PERSIST [(ON|OFF)] - Enables, disables, or shows current status of persistence of settings state. +SCHEMA [] - Imports a cql file as the active table schema or shows active user-defined schema. +USE - update the sstable[s] used by default with select, dump, describe commands USE /var/lib/cassandra/data/system/peers/ma-1-big-Data.db or with multiple sstables separated with spaces. This can also be a directory which will add all sstables in it. USE ma-1-big-Data.db ma-2-big-Data.db "/home/path with space/db/data/sstables" -SELECT - run a cql query against the current sstable (unless other specified) +SELECT - run a cql query against the current sstable (unless other specified) SELECT * FROM sstables WHERE key > 1 LIMIT 10 the keyword sstables will use the current sstable set with the USE command or set when running cqlsh. You can also specify an sstable here SELECT avg(someColumn) FROM /var/lib/cassandra/data/system/peers/ma-1-big-Data.db WHERE key > 1 -DUMP - dump the raw unfiltered partitions/rows. Useful for debuging TTLed/tombstoned data. +DUMP - dump the raw unfiltered partitions/rows. Useful for debuging TTLed/tombstoned data. DUMP; Can also specify a where clause for filtering the results. DUMP WHERE partitionKey = 'OpsCenter'; ``` +## describe + +Provides information about an sstable's data and its metadata. + +Example Output: + +``` +``` + +### Usage + +``` +java -jar sstable-tools.jar describe /path/to/file.db +``` + ## sstable2json sstable2json is a utility in the spirit of the [original sstable2json](https://docs.datastax.com/en/cassandra/1.2/cassandra/tools/toolsSstable2JsonUtilsTOC.html) -which has since been deprecated ([CASSANDRA-9618](https://issues.apache.org/jira/browse/CASSANDRA-9618)) -and has since been entirely removed with plans to add a replacement ([CASSANDRA-7464](https://issues.apache.org/jira/browse/CASSANDRA-7464)). +which was previously deprecated ([CASSANDRA-9618](https://issues.apache.org/jira/browse/CASSANDRA-9618)). +This functionality was merged into cassandra by ([CASSANDRA-7464](https://issues.apache.org/jira/browse/CASSANDRA-7464)) and to be released in Cassandra 3.0.4 and 3.4. This will likely be removed from sstable-tools in a future release. A key differentiator between the storage format between older verisons of Cassandra and Cassandra 3.0 is that an SSTable was previously a representation @@ -217,159 +268,6 @@ which represent deletes. In Cassandra 3.0, users can now delete ranges of rows ([CASSANDRA-6237](https://issues.apache.org/jira/browse/CASSANDRA-6237)) which creates range tombstones. - ```json TODO: update with loss of cql create option ``` - -## select - -Use the CQL parser to query the sstables directly without Cassandra or any configuration. Does not currently support ORDER_BY and DISTINCT but all other features should work. It will search the classpath for a ```schema.cql``` (override with ```-Dsstabletools.schema=...```) that contains the CQL ```CREATE TABLE``` statement for the schema. If it cannot find one it will fall back to a best guess from the sstable metadata. The data is dumped as a result set in a data table, but to see the raw data set the ```-Dquery.toJson``` for the alternative output. - -**WARNING:** without the schema the queries become difficult for any partition/clustering columns as their names are not included in meta data yet (CASSANDRA-9587) - -### Usage - -``` -java -jar sstable-tools.jar SELECT ... - -usage: SELECT FROM WHERE -``` - - -### Examples - -With a schema.cql file: -```CQL -CREATE TABLE mykeyspace.users ( - user_name varchar PRIMARY KEY, - password varchar, - gender varchar, - state varchar, - birth_year bigint -); -``` - -An example that selects users by birth year and state - -```json -java -jar sstable-tools.jar SELECT * FROM ma-2-big-Data.db WHERE birth_year >= 1985 AND state = 'CA' - - ┌────────────┬─────────────┬─────────┬───────────┬────────┐ - │user_name │birth_year │gender │password │state │ - ╞════════════╪═════════════╪═════════╪═══════════╪════════╡ - │frodo │1985 │male │pass@ │CA │ - └────────────┴─────────────┴─────────┴───────────┴────────┘ -``` - -Another example is given the table -```cql -CREATE TABLE IF NOT EXISTS test.wide ( key text, key2 text, val text, PRIMARY KEY (key, key2)); -``` -With four partitions, each with 9 rows, key2 1-9 all with the val = "X" -``` -java -jar sstable-tools.jar SELECT * FROM ma-3-big-Data.db - ┌──────┬───────┬──────┐ - │key │key2 │val │ - ╞══════╪═══════╪══════╡ - │4 │1 │X │ - ├──────┼───────┼──────┤ - │4 │2 │X │ - ├──────┼───────┼──────┤ - │4 │3 │X │ - ├──────┼───────┼──────┤ - │4 │4 │X │ - ├──────┼───────┼──────┤ -... - - │1 │7 │X │ - ├──────┼───────┼──────┤ - │1 │8 │X │ - ├──────┼───────┼──────┤ - │1 │9 │X │ - └──────┴───────┴──────┘ -``` -You can perform aggregates - -``` -java -jar sstable-tools.jar SELECT count(*) FROM ma-3-big-Data.db - ┌────────┐ - │count │ - ╞════════╡ - │36 │ - └────────┘ - - java -jar sstable-tools.jar SELECT min(key2), max(key2) FROM ma-3-big-Data.db - ┌───────────────────┬───────────────────┐ - │system.min(key2) │system.max(key2) │ - ╞═══════════════════╪═══════════════════╡ - │1 │9 │ - └───────────────────┴───────────────────┘ -``` - -To see unfiltered data (useful for tombstone debugging) use the raw json format -``` -java -jar -Dquery.toJson=true sstable-tools.jar SELECT * FROM ma-3-big-Data.db WHERE key2 = '1' -[ - { - "partition" : { - "key" : [ "4" ] - }, - "rows" : [ - { - "type" : "row", - "clustering" : [ "1" ], - "liveness_info" : { "tstamp" : 1456111364877667 }, - "cells" : [ - { "name" : "val", "value" : "X" } - ] - } - ] - }, - { - "partition" : { - "key" : [ "3" ] - }, - "rows" : [ - { - "type" : "row", - "clustering" : [ "1" ], - "liveness_info" : { "tstamp" : 1456111364856446 }, - "cells" : [ - { "name" : "val", "value" : "X" } - ] - } - ] - }, - { - "partition" : { - "key" : [ "2" ] - }, - "rows" : [ - { - "type" : "row", - "clustering" : [ "1" ], - "liveness_info" : { "tstamp" : 1456111364834000 }, - "cells" : [ - { "name" : "val", "value" : "X" } - ] - } - ] - }, - { - "partition" : { - "key" : [ "1" ] - }, - "rows" : [ - { - "type" : "row", - "clustering" : [ "1" ], - "liveness_info" : { "tstamp" : 1456111364803946 }, - "cells" : [ - { "name" : "val", "value" : "X" } - ] - } - ] - } -] -```