-
-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: schema detection for k8s events
framework setup for schema detection at the time of ingestion sample json added for k8s events formats file added that holds the list of sample jsons, schema type of all known log sources server loads the known schemas at the initialization at the time of incoming events, it checks if schema of incoming events match with any of the known schema if yes, it adds `schema_type` to the stream info custom flattening is required before storing the schema and ingesting to parseable for those events which have hierarchical structure
- Loading branch information
1 parent
28b984a
commit 2ba13fc
Showing
17 changed files
with
910 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Parseable Server (C) 2022 - 2024 Parseable, Inc. | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as | ||
* published by the Free Software Foundation, either version 3 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
* | ||
*/ | ||
|
||
use arrow_json::reader::infer_json_schema_from_iterator; | ||
use arrow_schema::Schema; | ||
use once_cell::sync::OnceCell; | ||
use std::collections::HashMap; | ||
|
||
use crate::{event::format::update_data_type_to_datetime, utils::json::flatten_json_body}; | ||
|
||
// Expose some static variables for internal usage | ||
pub static KNOWN_SCHEMA_LIST: OnceCell<HashMap<String, Schema>> = OnceCell::new(); | ||
|
||
pub fn detect_schema() -> HashMap<String, Schema> { | ||
let mut known_schema_list: HashMap<String, Schema> = HashMap::new(); | ||
//read file formats.json | ||
let formats_file = std::fs::File::open("src/event/known-formats/formats.json").unwrap(); | ||
let formats_reader = std::io::BufReader::new(formats_file); | ||
let formats: serde_json::Value = serde_json::from_reader(formats_reader).unwrap(); | ||
//iterate over the formats | ||
for format in formats.as_array().unwrap() { | ||
let schema_type = format["schema_type"].as_str().unwrap(); | ||
let sample_json_path = format["sample_json_path"].as_str().unwrap(); | ||
let sample_file = std::fs::File::open(sample_json_path).unwrap(); | ||
let sample_reader = std::io::BufReader::new(sample_file); | ||
let sample_json: serde_json::Value = serde_json::from_reader(sample_reader).unwrap(); | ||
let flattened_json = flatten_json_body(sample_json, None, None, None, false).unwrap(); | ||
let sample_json_records = [flattened_json.clone()]; | ||
let mut schema = | ||
infer_json_schema_from_iterator(sample_json_records.iter().map(Ok)).unwrap(); | ||
schema = update_data_type_to_datetime(schema, flattened_json, Vec::new()); | ||
known_schema_list.insert(schema_type.to_string(), schema); | ||
} | ||
prepare_known_schema_list(known_schema_list.clone()); | ||
known_schema_list | ||
} | ||
|
||
pub fn prepare_known_schema_list(known_schema_list: HashMap<String, Schema>) { | ||
KNOWN_SCHEMA_LIST | ||
.set(known_schema_list) | ||
.expect("only set once") | ||
} | ||
|
||
pub fn get_known_schema_list() -> &'static HashMap<String, Schema> { | ||
KNOWN_SCHEMA_LIST | ||
.get() | ||
.expect("fetch schema list from static variable") | ||
} | ||
|
||
pub fn validate_schema_type(schema: &Schema) -> String { | ||
let known_schema_list = get_known_schema_list(); | ||
let mut schema_type = String::default(); | ||
for (known_schema_type, known_schema) in known_schema_list.iter() { | ||
if known_schema == schema { | ||
schema_type = known_schema_type.to_string(); | ||
break; | ||
} | ||
} | ||
schema_type | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
[ | ||
{ | ||
"name": "kubernetes", | ||
"schema_type": "kubernetes-events", | ||
"sample_json_path": "src/event/known-formats/kubernetes-events-sample/kubernetes-events-sample.json" | ||
} | ||
] |
236 changes: 236 additions & 0 deletions
236
src/event/known-formats/kubernetes-events-sample/kubernetes-events-sample.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
{ | ||
"apiVersion": "v1", | ||
"items": [ | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:18Z", | ||
"involvedObject": { | ||
"apiVersion": "v1", | ||
"fieldPath": "spec.containers{vantage-kubernetes-agent}", | ||
"kind": "Pod", | ||
"name": "vka-vantage-kubernetes-agent-0", | ||
"namespace": "vantage", | ||
"resourceVersion": "15629581", | ||
"uid": "3fa579b0-0c6f-4f44-a320-69389c8f607a" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:18Z", | ||
"message": "Stopping container vantage-kubernetes-agent", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:18Z", | ||
"name": "vka-vantage-kubernetes-agent-0.1805f6d7de4bc710", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741805", | ||
"uid": "629a5864-06de-414d-8ad7-b7637b8cbfa0" | ||
}, | ||
"reason": "Killing", | ||
"reportingComponent": "kubelet", | ||
"reportingInstance": "ip-10-0-2-170.ec2.internal", | ||
"source": { | ||
"component": "kubelet", | ||
"host": "ip-10-0-2-170.ec2.internal" | ||
}, | ||
"type": "Normal" | ||
}, | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:19Z", | ||
"involvedObject": { | ||
"apiVersion": "v1", | ||
"kind": "Pod", | ||
"name": "vka-vantage-kubernetes-agent-0", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741822", | ||
"uid": "0118c8be-55df-40bf-96ed-41bb11b5a771" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:19Z", | ||
"message": "Successfully assigned vantage/vka-vantage-kubernetes-agent-0 to ip-10-0-2-170.ec2.internal", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:19Z", | ||
"name": "vka-vantage-kubernetes-agent-0.1805f6d80c652af1", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741826", | ||
"uid": "e1dab7eb-ab65-44be-9b75-2f400cd70275" | ||
}, | ||
"reason": "Scheduled", | ||
"reportingComponent": "default-scheduler", | ||
"reportingInstance": "", | ||
"source": { | ||
"component": "default-scheduler" | ||
}, | ||
"type": "Normal" | ||
}, | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:22Z", | ||
"involvedObject": { | ||
"apiVersion": "v1", | ||
"fieldPath": "spec.containers{vantage-kubernetes-agent}", | ||
"kind": "Pod", | ||
"name": "vka-vantage-kubernetes-agent-0", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741823", | ||
"uid": "0118c8be-55df-40bf-96ed-41bb11b5a771" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:22Z", | ||
"message": "Container image \"quay.io/vantage-sh/kubernetes-agent:1.0.26\" already present on machine", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:22Z", | ||
"name": "vka-vantage-kubernetes-agent-0.1805f6d8d0c1d741", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741846", | ||
"uid": "6c9c24bb-4ff3-486f-8151-91d1dad159ee" | ||
}, | ||
"reason": "Pulled", | ||
"reportingComponent": "kubelet", | ||
"reportingInstance": "ip-10-0-2-170.ec2.internal", | ||
"source": { | ||
"component": "kubelet", | ||
"host": "ip-10-0-2-170.ec2.internal" | ||
}, | ||
"type": "Normal" | ||
}, | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:22Z", | ||
"involvedObject": { | ||
"apiVersion": "v1", | ||
"fieldPath": "spec.containers{vantage-kubernetes-agent}", | ||
"kind": "Pod", | ||
"name": "vka-vantage-kubernetes-agent-0", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741823", | ||
"uid": "0118c8be-55df-40bf-96ed-41bb11b5a771" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:22Z", | ||
"message": "Created container vantage-kubernetes-agent", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:22Z", | ||
"name": "vka-vantage-kubernetes-agent-0.1805f6d8d271c600", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741847", | ||
"uid": "d23e308a-b17e-42ba-a5ed-3a55c3d9e0d2" | ||
}, | ||
"reason": "Created", | ||
"reportingComponent": "kubelet", | ||
"reportingInstance": "ip-10-0-2-170.ec2.internal", | ||
"source": { | ||
"component": "kubelet", | ||
"host": "ip-10-0-2-170.ec2.internal" | ||
}, | ||
"type": "Normal" | ||
}, | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:22Z", | ||
"involvedObject": { | ||
"apiVersion": "v1", | ||
"fieldPath": "spec.containers{vantage-kubernetes-agent}", | ||
"kind": "Pod", | ||
"name": "vka-vantage-kubernetes-agent-0", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741823", | ||
"uid": "0118c8be-55df-40bf-96ed-41bb11b5a771" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:22Z", | ||
"message": "Started container vantage-kubernetes-agent", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:23Z", | ||
"name": "vka-vantage-kubernetes-agent-0.1805f6d8d87a3795", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741848", | ||
"uid": "e48c06da-3fbf-41a1-8685-6224854f0391" | ||
}, | ||
"reason": "Started", | ||
"reportingComponent": "kubelet", | ||
"reportingInstance": "ip-10-0-2-170.ec2.internal", | ||
"source": { | ||
"component": "kubelet", | ||
"host": "ip-10-0-2-170.ec2.internal" | ||
}, | ||
"type": "Normal" | ||
}, | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:23Z", | ||
"involvedObject": { | ||
"apiVersion": "v1", | ||
"fieldPath": "spec.containers{vantage-kubernetes-agent}", | ||
"kind": "Pod", | ||
"name": "vka-vantage-kubernetes-agent-0", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741823", | ||
"uid": "0118c8be-55df-40bf-96ed-41bb11b5a771" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:23Z", | ||
"message": "Readiness probe failed: Get \"http://10.0.2.143:9010/healthz\": dial tcp 10.0.2.143:9010: connect: connection refused", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:23Z", | ||
"name": "vka-vantage-kubernetes-agent-0.1805f6d8f61959d7", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741851", | ||
"uid": "6199c62b-9ca5-4c46-abcb-53137ed24c47" | ||
}, | ||
"reason": "Unhealthy", | ||
"reportingComponent": "kubelet", | ||
"reportingInstance": "ip-10-0-2-170.ec2.internal", | ||
"source": { | ||
"component": "kubelet", | ||
"host": "ip-10-0-2-170.ec2.internal" | ||
}, | ||
"type": "Warning" | ||
}, | ||
{ | ||
"apiVersion": "v1", | ||
"count": 1, | ||
"eventTime": null, | ||
"firstTimestamp": "2024-11-08T10:17:19Z", | ||
"involvedObject": { | ||
"apiVersion": "apps/v1", | ||
"kind": "StatefulSet", | ||
"name": "vka-vantage-kubernetes-agent", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741814", | ||
"uid": "3f91d728-f31f-4582-8639-df259d97ac55" | ||
}, | ||
"kind": "Event", | ||
"lastTimestamp": "2024-11-08T10:17:19Z", | ||
"message": "create Pod vka-vantage-kubernetes-agent-0 in StatefulSet vka-vantage-kubernetes-agent successful", | ||
"metadata": { | ||
"creationTimestamp": "2024-11-08T10:17:19Z", | ||
"name": "vka-vantage-kubernetes-agent.1805f6d80bd97994", | ||
"namespace": "vantage", | ||
"resourceVersion": "25741827", | ||
"uid": "c5bf4dee-649f-48ba-b6da-c6ccf4e9262c" | ||
}, | ||
"reason": "SuccessfulCreate", | ||
"reportingComponent": "statefulset-controller", | ||
"reportingInstance": "", | ||
"source": { | ||
"component": "statefulset-controller" | ||
}, | ||
"type": "Normal" | ||
} | ||
], | ||
"kind": "List", | ||
"metadata": { | ||
"resourceVersion": "" | ||
} | ||
} |
Oops, something went wrong.