Skip to content

Commit

Permalink
Crypto Function Migration (apache#12840)
Browse files Browse the repository at this point in the history
* Update crypto docs

* delete old

* fmt checks
  • Loading branch information
jonathanc-n authored Oct 10, 2024
1 parent f7591fb commit d5d9d30
Show file tree
Hide file tree
Showing 8 changed files with 332 additions and 89 deletions.
48 changes: 47 additions & 1 deletion datafusion/functions/src/crypto/digest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
use super::basic::{digest, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct DigestFunc {
Expand Down Expand Up @@ -69,4 +71,48 @@ impl ScalarUDFImpl for DigestFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
digest(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_digest_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_digest_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description(
"Computes the binary hash of an expression using the specified algorithm.",
)
.with_syntax_example("digest(expression, algorithm)")
.with_sql_example(
r#"```sql
> select digest('foo', 'sha256');
+------------------------------------------+
| digest(Utf8("foo"), Utf8("sha256")) |
+------------------------------------------+
| <binary_hash_result> |
+------------------------------------------+
```"#,
)
.with_standard_argument(
"expression", "String")
.with_argument(
"algorithm",
"String expression specifying algorithm to use. Must be one of:
- md5
- sha224
- sha256
- sha384
- sha512
- blake2s
- blake2b
- blake3",
)
.build()
.unwrap()
})
}
34 changes: 33 additions & 1 deletion datafusion/functions/src/crypto/md5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use crate::crypto::basic::md5;
use arrow::datatypes::DataType;
use datafusion_common::{plan_err, Result};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct Md5Func {
Expand Down Expand Up @@ -84,4 +88,32 @@ impl ScalarUDFImpl for Md5Func {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
md5(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_md5_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_md5_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes an MD5 128-bit checksum for a string expression.")
.with_syntax_example("md5(expression)")
.with_sql_example(
r#"```sql
> select md5('foo');
+-------------------------------------+
| md5(Utf8("foo")) |
+-------------------------------------+
| <md5_checksum_result> |
+-------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
})
}
10 changes: 10 additions & 0 deletions datafusion/functions/src/crypto/sha224.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ fn get_sha224_doc() -> &'static Documentation {
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-224 hash of a binary string.")
.with_syntax_example("sha224(expression)")
.with_sql_example(
r#"```sql
> select sha224('foo');
+------------------------------------------+
| sha224(Utf8("foo")) |
+------------------------------------------+
| <sha224_hash_result> |
+------------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
Expand Down
35 changes: 34 additions & 1 deletion datafusion/functions/src/crypto/sha256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use super::basic::{sha256, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct SHA256Func {
Expand Down Expand Up @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA256Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha256(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_sha256_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_sha256_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-256 hash of a binary string.")
.with_syntax_example("sha256(expression)")
.with_sql_example(
r#"```sql
> select sha256('foo');
+--------------------------------------+
| sha256(Utf8("foo")) |
+--------------------------------------+
| <sha256_hash_result> |
+--------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
})
}
35 changes: 34 additions & 1 deletion datafusion/functions/src/crypto/sha384.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use super::basic::{sha384, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct SHA384Func {
Expand Down Expand Up @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA384Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha384(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_sha384_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_sha384_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-384 hash of a binary string.")
.with_syntax_example("sha384(expression)")
.with_sql_example(
r#"```sql
> select sha384('foo');
+-----------------------------------------+
| sha384(Utf8("foo")) |
+-----------------------------------------+
| <sha384_hash_result> |
+-----------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
})
}
35 changes: 34 additions & 1 deletion datafusion/functions/src/crypto/sha512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use super::basic::{sha512, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct SHA512Func {
Expand Down Expand Up @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA512Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha512(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_sha512_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_sha512_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-512 hash of a binary string.")
.with_syntax_example("sha512(expression)")
.with_sql_example(
r#"```sql
> select sha512('foo');
+-------------------------------------------+
| sha512(Utf8("foo")) |
+-------------------------------------------+
| <sha512_hash_result> |
+-------------------------------------------+
```"#,
)
.with_argument("expression", "String")
.build()
.unwrap()
})
}
84 changes: 0 additions & 84 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -3066,90 +3066,6 @@ select map_values(map([100, 5], [42,43]));
[42, 43]
```

## Hashing Functions

- [digest](#digest)
- [md5](#md5)
- [sha256](#sha256)
- [sha384](#sha384)
- [sha512](#sha512)

### `digest`

Computes the binary hash of an expression using the specified algorithm.

```
digest(expression, algorithm)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.
- **algorithm**: String expression specifying algorithm to use.
Must be one of:

- md5
- sha224
- sha256
- sha384
- sha512
- blake2s
- blake2b
- blake3

### `md5`

Computes an MD5 128-bit checksum for a string expression.

```
md5(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

### `sha256`

Computes the SHA-256 hash of a binary string.

```
sha256(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

### `sha384`

Computes the SHA-384 hash of a binary string.

```
sha384(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

### `sha512`

Computes the SHA-512 hash of a binary string.

```
sha512(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

## Other Functions

- [arrow_cast](#arrow_cast)
Expand Down
Loading

0 comments on commit d5d9d30

Please sign in to comment.