Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix block comment for parser and install tools for pg 13 #303

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[workspace]
exclude = ["db/*", "assets/*"]
members = ["dump-parser", "replibyte", "subset"]
members = ["dump-parser", "replibyte", "subset", "superset"]
14 changes: 10 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ COPY ./dump-parser ./dump-parser

# subset
COPY ./subset ./subset
COPY ./superset ./superset

# replibyte
COPY ./replibyte/Cargo.toml ./replibyte/Cargo.toml
Expand All @@ -30,6 +31,7 @@ RUN rm src/*.rs
COPY ./replibyte/src ./replibyte/src
COPY ./dump-parser/src ./dump-parser/src
COPY ./subset/src ./subset/src
COPY ./superset/src ./superset/src

# build for release
RUN rm ./target/release/deps/replibyte*
Expand All @@ -42,10 +44,14 @@ FROM debian:buster-slim
LABEL org.opencontainers.image.source https://github.com/qovery/replibyte

# Install Postgres and MySQL binaries
RUN apt-get clean && apt-get update && apt-get install -y \
wget \
postgresql-client \
default-mysql-client
RUN apt-get clean && apt-get update
RUN apt-get install -y wget gnupg2 lsb-release
RUN wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -

RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" | tee /etc/apt/sources.list.d/pgdg.list

RUN apt-get update
RUN apt-get install -y postgresql-client-13

# Install MongoDB tools
RUN wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-debian92-x86_64-100.5.2.deb && \
Expand Down
81 changes: 72 additions & 9 deletions dump-parser/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ fn list_statements(query: &str) -> Vec<Statement> {

let mut is_statement_complete = true;
let mut is_comment_line = false;
let mut is_block_comment_line = false;
let mut is_partial_comment_line = false;
let mut start_index = 0usize;
let mut previous_chars_are_whitespaces = true;
Expand All @@ -186,7 +187,7 @@ fn list_statements(query: &str) -> Vec<Statement> {
is_comment_line = false;
previous_chars_are_whitespaces = true;
}
b'\'' if !is_comment_line && !is_partial_comment_line => {
b'\'' if !is_comment_line && !is_partial_comment_line && !is_block_comment_line => {
if stack.get(0) == Some(&b'\'') {
if (query.len() > next_idx) && &query[next_idx..next_idx] == "'" {
// do nothing because the ' char is escaped via a double ''
Expand All @@ -204,14 +205,15 @@ fn list_statements(query: &str) -> Vec<Statement> {
}
b'(' if !is_comment_line
&& !is_partial_comment_line
&& !is_block_comment_line
&& stack.get(0) != Some(&b'\'') =>
{
stack.insert(0, byte_char);
is_statement_complete = false;
is_comment_line = false;
previous_chars_are_whitespaces = false;
}
b')' if !is_comment_line && !is_partial_comment_line => {
b')' if !is_comment_line && !is_partial_comment_line && !is_block_comment_line => {
if stack.get(0) == Some(&b'(') {
let _ = stack.remove(0);
} else if stack.get(0) != Some(&b'\'') {
Expand Down Expand Up @@ -240,12 +242,49 @@ fn list_statements(query: &str) -> Vec<Statement> {
is_partial_comment_line = true;
previous_chars_are_whitespaces = false;
}
b'\n' if !is_comment_line && !is_partial_comment_line && is_statement_complete => {
b'/' if !is_block_comment_line
&& previous_chars_are_whitespaces
&& is_statement_complete
&& next_idx < query_bytes.len() && query_bytes[next_idx] == b'*' =>
{
// comment
is_block_comment_line = true;
previous_chars_are_whitespaces = false;
}
b'/' if !is_statement_complete
&& next_idx < query_bytes.len() && query_bytes[next_idx] == b'*'
&& stack.get(0) != Some(&b'\'') =>
{
// comment
is_block_comment_line = true;
previous_chars_are_whitespaces = false;
}

b'*' if is_block_comment_line
&& previous_chars_are_whitespaces
&& is_statement_complete
&& next_idx < query_bytes.len() && query_bytes[next_idx] == b'/' =>
{
// comment
is_block_comment_line = false;
previous_chars_are_whitespaces = false;
}
b'*' if !is_statement_complete
&& next_idx < query_bytes.len() && query_bytes[next_idx] == b'/'
&& stack.get(0) != Some(&b'\'') =>
{
// comment
is_block_comment_line = false;
previous_chars_are_whitespaces = false;
}

b'\n' if !is_comment_line && !is_partial_comment_line && !is_block_comment_line && is_statement_complete => {
previous_chars_are_whitespaces = true;
sql_statements.push(Statement::NewLine);
}
b';' if !is_comment_line
&& !is_partial_comment_line
&& !is_block_comment_line
&& stack.get(0) != Some(&b'\'') =>
{
// end of query
Expand Down Expand Up @@ -310,18 +349,42 @@ mod tests {

#[test]
fn check_list_sql_queries_from_dump_reader() {
let r = r#"INSERT INTO public.Users(uuid, "text", name) VALUES ('a84ac0c6-2348-45c0-b86c-8d34e251a859', 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras eu nisi tempor, viverra turpis sit amet, sodales augue. Vivamus sit amet erat urna. Morbi porta, quam nec consequat suscipit, ante diam tempus risus, et consequat erat odio sed magna. Maecenas dignissim quam nibh, nec congue magna convallis a.

Etiam augue augue, bibendum et molestie non, finibus non nulla. Etiam quis rhoncus leo, eu congue erat. Cras id magna ac dui convallis ultricies. Donec sed elit ac urna condimentum auctor. Nunc nec nulla id dui feugiat dictum sit amet nec orci. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae.


', 'some-name');"#.as_bytes();
let r = r##"CREATE OR REPLACE FUNCTION check_mandatory_attributes_52(
listerp TEXT,
listtype TEXT,
attributes HSTORE
)
RETURNS BOOLEAN AS $$
SELECT CASE
WHEN listerp = 'xero' AND listtype = 'GeneralLedger'
THEN public.defined(attributes, 'AccountID'::text) AND public.defined(attributes, 'Name'::text)
/**
* Don't treat APAccount and Tax as mandatory because we can't currently differentiate between manually
* created list items and sync'ed list items and these two lists often have some manual entries.
*/
WHEN listtype IN ('APAccount', 'Tax')
THEN TRUE
ELSE (
SELECT COUNT(*) = 0
FROM (
SELECT unnest(public.get_unique_attribute_keys_52(listerp, listtype)) AS key_to_check
) AS unique_keys
WHERE NOT public.defined(attributes, key_to_check::text)
)
END
$$

INSERT INTO public.exportformat (id, orgunit_id, rootou_id, name, ftpintegrated, formattemplate, filenametemplate, active, filetype, type) VALUES ('8755D03A3EAA4DE98B39370238A56829', '7EE1E394E6B64B19AFB4B126A518521A', '36CA9CB0BBB44932980AFF87ED8547BC', 'CSV', true, '<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="utf-8" />
</xsl:stylesheet>
', NULL, true, 'CSV', 'invoice');"##.as_bytes();
let reader = BufReader::new(r);

let mut queries = vec![];

list_sql_queries_from_dump_reader(reader, |query| {
queries.push(query.to_string());
println!("list_sql_queries_from_dump_reader {}, ",query.to_string());
ListQueryResult::Continue
});

Expand Down
3 changes: 2 additions & 1 deletion replibyte/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ authors = ["Qovery Team", "Fab", "Benny", "Contributos"]
[dependencies]
dump-parser = { path = "../dump-parser" }
subset = { path = "../subset" }
superset = { path = "../superset" }
rand = "0.8.5"
anyhow = "1.0.56"
serde_yaml = "0.8"
Expand All @@ -34,7 +35,7 @@ flate2 = "1.0"
bson = "2.2"
aes-gcm = "0.9"
which = "4.2.5"
mongodb-schema-parser = { git = "https://github.com/mongodb-rust/mongodb-schema-parser.git", rev = "2d489307dd70b63b216a9968f7dec7c217108b32" }
# mongodb-schema-parser = { git = "https://github.com/mongodb-rust/mongodb-schema-parser.git", rev = "2d489307dd70b63b216a9968f7dec7c217108b32" }
url = "2.2.2"
tempfile = "3.3"
ctrlc = "3.2.1"
Expand Down
1 change: 1 addition & 0 deletions replibyte/src/commands/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ where
transformers: &transformers,
skip_config: &skip_config,
database_subset: &source.database_subset,
database_superset: &source.database_superset,
only_tables: &only_tables_config,
};

Expand Down
19 changes: 15 additions & 4 deletions replibyte/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ pub struct SourceConfig {
pub transformers: Option<Vec<TransformerConfig>>,
pub skip: Option<Vec<SkipConfig>>,
pub database_subset: Option<DatabaseSubsetConfig>,
pub database_superset: Option<DatabaseSupersetConfig>,
pub only_tables: Option<Vec<OnlyTablesConfig>>,
}

Expand Down Expand Up @@ -241,11 +242,21 @@ pub struct DatabaseSubsetConfig {
pub database: String,
pub table: String,
#[serde(flatten)]
pub strategy: DatabaseSubsetConfigStrategy,
pub strategy: DatabaseScaleConfigStrategy,
// copy the entire table - not affected by the subset algorithm
pub passthrough_tables: Option<Vec<String>>,
}

#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub struct DatabaseSupersetConfig {
pub database: String,
pub table: String,
#[serde(flatten)]
pub strategy: DatabaseScaleConfigStrategy,
// copy the entire table - not affected by the superset algorithm
pub passthrough_tables: Option<Vec<String>>,
}

#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub struct OnlyTablesConfig {
pub database: String,
Expand All @@ -255,12 +266,12 @@ pub struct OnlyTablesConfig {
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[serde(rename_all = "kebab-case")]
#[serde(tag = "strategy_name", content = "strategy_options")]
pub enum DatabaseSubsetConfigStrategy {
Random(DatabaseSubsetConfigStrategyRandom),
pub enum DatabaseScaleConfigStrategy {
Random(DatabaseScaleConfigStrategyRandom),
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
pub struct DatabaseSubsetConfigStrategyRandom {
pub struct DatabaseScaleConfigStrategyRandom {
pub percent: u8,
}

Expand Down
7 changes: 3 additions & 4 deletions replibyte/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use migration::{migrations, Migrator};
use utils::get_replibyte_version;

use crate::cli::{DumpCommand, RestoreCommand, SubCommand, TransformerCommand, CLI, SourceCommand};
use crate::config::{Config, DatabaseSubsetConfig, DatastoreConfig};
use crate::config::{Config, DatabaseSubsetConfig, DatabaseSupersetConfig, DatastoreConfig};
use crate::datastore::local_disk::LocalDisk;
use crate::datastore::s3::S3;
use crate::datastore::Datastore;
Expand Down Expand Up @@ -152,11 +152,11 @@ fn run(config: Config, sub_commands: &SubCommand) -> anyhow::Result<()> {
RestoreCommand::Remote(args) => if args.output {},
},
_ => {
let _ = thread::spawn(move || show_progress_bar(rx_pb));
// let _ = thread::spawn(move || show_progress_bar(rx_pb));
}
},
_ => {
let _ = thread::spawn(move || show_progress_bar(rx_pb));
// let _ = thread::spawn(move || show_progress_bar(rx_pb));
}
};

Expand All @@ -174,7 +174,6 @@ fn run(config: Config, sub_commands: &SubCommand) -> anyhow::Result<()> {
if let Some(name) = &args.name {
datastore.set_dump_name(name.to_string());
}

commands::dump::run(args, datastore, config, progress_callback)
}
DumpCommand::Delete(args) => commands::dump::delete(datastore, args),
Expand Down
3 changes: 2 additions & 1 deletion replibyte/src/source/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::io::Error;

use crate::config::{DatabaseSubsetConfig, OnlyTablesConfig, SkipConfig};
use crate::config::{DatabaseSubsetConfig, DatabaseSupersetConfig, OnlyTablesConfig, SkipConfig};
use crate::connector::Connector;
use crate::transformer::Transformer;
use crate::types::{OriginalQuery, Query};
Expand All @@ -27,6 +27,7 @@ pub trait Source: Connector {
pub struct SourceOptions<'a> {
pub transformers: &'a Vec<Box<dyn Transformer>>,
pub skip_config: &'a Vec<SkipConfig>,
pub database_superset: &'a Option<DatabaseSupersetConfig>,
pub database_subset: &'a Option<DatabaseSubsetConfig>,
pub only_tables: &'a Vec<OnlyTablesConfig>,
}
27 changes: 17 additions & 10 deletions replibyte/src/source/mongodb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::SourceOptions;

use bson::{Bson, Document};
use dump_parser::mongodb::Archive;
use mongodb_schema_parser::SchemaParser;
// use mongodb_schema_parser::SchemaParser;

pub struct MongoDB<'a> {
uri: &'a str,
Expand Down Expand Up @@ -73,6 +73,10 @@ impl<'a> Source for MongoDB<'a> {
todo!("database subset not supported yet for MongoDB source")
}

if let Some(_database_superset) = &options.database_superset {
todo!("database superset not supported yet for MongoDB source")
}

let dump_args = vec![
"--uri",
self.uri,
Expand Down Expand Up @@ -308,20 +312,20 @@ pub fn read_and_parse_schema<R: Read>(reader: BufReader<R>) -> Result<(), Error>

table.set_titles(row![format!("Collection {}", name)]);

let mut schema_parser = SchemaParser::new();
// let mut schema_parser = SchemaParser::new();

for doc in collection {
schema_parser.write_bson(doc).unwrap();
}
// for doc in collection {
// schema_parser.write_bson(doc).unwrap();
// }

let schema = schema_parser.flush();
// let schema = schema_parser.flush();

let json_data = serde_json::to_string_pretty(&schema).unwrap();
// let json_data = serde_json::to_string_pretty(&schema).unwrap();

table.add_row(row![name]);
table.add_row(row![json_data]);
// table.add_row(row![name]);
// table.add_row(row![json_data]);

let _ = table.printstd();
// let _ = table.printstd();
}
});

Expand Down Expand Up @@ -367,6 +371,7 @@ mod tests {
transformers: &transformers,
skip_config: &vec![],
database_subset: &None,
database_superset: &None,
only_tables: &vec![],
};

Expand All @@ -379,6 +384,7 @@ mod tests {
transformers: &transformers,
skip_config: &vec![],
database_subset: &None,
database_superset: &None,
only_tables: &vec![],
};

Expand All @@ -394,6 +400,7 @@ mod tests {
transformers: &transformers,
skip_config: &vec![],
database_subset: &None,
database_superset: &None,
only_tables: &vec![],
};

Expand Down
4 changes: 4 additions & 0 deletions replibyte/src/source/mongodb_stdin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ impl Source for MongoDBStdin {
todo!("database subset not supported yet for MongoDB source")
}

if let Some(_database_superset) = &options.database_superset {
todo!("database superset not supported yet for MongoDB source")
}

let _ = read_and_transform(reader, options, query_callback)?;
Ok(())
}
Expand Down
2 changes: 2 additions & 0 deletions replibyte/src/source/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ mod tests {
transformers: &transformers,
skip_config: &vec![],
database_subset: &None,
database_superset: &None,
only_tables: &vec![],
};

Expand All @@ -467,6 +468,7 @@ mod tests {
transformers: &transformers,
skip_config: &vec![],
database_subset: &None,
database_superset: &None,
only_tables: &vec![],
};
assert!(p
Expand Down
Loading