Skip to content

Commit

Permalink
Data import implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
ucayalifish committed Feb 10, 2018
1 parent f7dadba commit ab0f60f
Show file tree
Hide file tree
Showing 11 changed files with 397 additions and 6 deletions.
31 changes: 28 additions & 3 deletions console.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
DROP TABLE raw_data;

CREATE TABLE raw_data
(
ssoid TEXT,
Expand All @@ -21,6 +23,14 @@ DELIMITER ';' CSV HEADER;
SELECT DISTINCT asubtype
FROM raw_data;

SELECT
formid,
atype,
asubtype
FROM raw_data
GROUP BY formid, atype, asubtype
ORDER BY formid, atype, asubtype;

SELECT DISTINCT formid
FROM raw_data;

Expand Down Expand Up @@ -123,6 +133,7 @@ WITH form_counter AS (
AND ssoid IS NOT NULL
AND formid IS NOT NULL
AND formid <> 'null'
AND formid <> ''
GROUP BY ssoid, formid
ORDER BY formid
)
Expand All @@ -131,7 +142,20 @@ SELECT
count(formid) AS fc
FROM form_counter
GROUP BY formid
ORDER BY fc DESC;
ORDER BY fc DESC
LIMIT 5;

SELECT
ssoid,
formid
FROM raw_data
WHERE ssoid <> 'Unauthorized'
AND ssoid IS NOT NULL
AND formid IS NOT NULL
AND formid <> 'null'
AND formid <> ''
GROUP BY ssoid, formid
ORDER BY formid;

WITH form_counter AS (
SELECT
Expand All @@ -146,11 +170,12 @@ WITH form_counter AS (
ORDER BY formid
)
SELECT
ssoid, formid
ssoid,
formid
FROM form_counter
GROUP BY ssoid, formid
ORDER BY ssoid;

SELECT ssoid
FROM raw_data
GROUP BY ssoid;
GROUP BY ssoid;
46 changes: 46 additions & 0 deletions csvimporter/csvimporter.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="Spring" name="Spring">
<configuration />
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/${project.build.directory}/classes" />
<excludeFolder url="file://$MODULE_DIR$/${project.build.directory}/test-classes" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter:1.5.10.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot:1.5.10.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context:4.3.14.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aop:4.3.14.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-expression:4.3.14.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-autoconfigure:1.5.10.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-core:4.3.14.RELEASE" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: org.yaml:snakeyaml:1.17" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-logging:1.5.10.RELEASE" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.1.11" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.1.11" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.22" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:jcl-over-slf4j:1.7.25" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:jul-to-slf4j:1.7.25" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:log4j-over-slf4j:1.7.25" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-jdbc:1.5.10.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.apache.tomcat:tomcat-jdbc:8.5.27" level="project" />
<orderEntry type="library" name="Maven: org.apache.tomcat:tomcat-juli:8.5.27" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-jdbc:4.3.14.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-beans:4.3.14.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-tx:4.3.14.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.postgresql:postgresql:42.2.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.5" level="project" />
<orderEntry type="module" module-name="datautils" />
</component>
</module>
64 changes: 64 additions & 0 deletions csvimporter/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

<parent>
<artifactId>java-csv-task</artifactId>
<groupId>ru.ffyud.trials</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>csv-importer</artifactId>

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<version>1.5.10.RELEASE</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
<version>1.5.10.RELEASE</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
<version>1.5.10.RELEASE</version>
</dependency>

<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>

<dependency>
<groupId>ru.ffyud.trials</groupId>
<artifactId>data-utils</artifactId>
<version>${version.to.use}</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
</plugins>
</build>

</project>
13 changes: 13 additions & 0 deletions csvimporter/src/main/java/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Manifest-Version: 1.0
Class-Path: logback-classic-1.1.11.jar slf4j-api-1.7.25.jar postgresql
-42.2.1.jar spring-boot-starter-logging-1.5.10.RELEASE.jar log4j-over
-slf4j-1.7.25.jar jcl-over-slf4j-1.7.25.jar snakeyaml-1.17.jar spring
-boot-autoconfigure-1.5.10.RELEASE.jar tomcat-juli-8.5.27.jar spring-
jdbc-4.3.14.RELEASE.jar spring-aop-4.3.14.RELEASE.jar spring-expressi
on-4.3.14.RELEASE.jar jul-to-slf4j-1.7.25.jar spring-tx-4.3.14.RELEAS
E.jar spring-boot-starter-1.5.10.RELEASE.jar spring-boot-starter-jdbc
-1.5.10.RELEASE.jar spring-boot-1.5.10.RELEASE.jar spring-core-4.3.14
.RELEASE.jar logback-core-1.1.11.jar spring-beans-4.3.14.RELEASE.jar
spring-context-4.3.14.RELEASE.jar tomcat-jdbc-8.5.27.jar
Main-Class: ru.ffyud.trials.csvimporter.CsvImporter

144 changes: 144 additions & 0 deletions csvimporter/src/main/java/ru/ffyud/trials/csvimporter/CsvImporter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package ru.ffyud.trials.csvimporter;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
import org.springframework.jdbc.core.JdbcTemplate;
import ru.ffyud.trials.csvdata.Fields;

import java.io.FileReader;
import java.io.Reader;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

@SpringBootApplication
public class CsvImporter implements ApplicationRunner {

private static final Logger logger = LoggerFactory.getLogger(CsvImporter.class);

@Autowired
public CsvImporter(JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}

private final JdbcTemplate jdbcTemplate;

// размер тестовых данных столь велик, что я решил накопить их в памяти.
// Если их станет больше, то такой буфер так и так потребуется.
private final List<CSVRecord> inMemBuffer = new ArrayList<>();

private void processCSVRecord(final CSVRecord record) {
inMemBuffer.add(record);
}

private final static String UpdateSQL =
"INSERT INTO raw_data "
+ "(ssoid, ts, grp, atype, asubtype, url, orgid, formid, code, ltpa, sudirresponse, ymdh) "
+ "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);";

private long batchInMemBuffer() {

final int batchSize = 500;
long total = 0;
int start = 0;
do {
final List<CSVRecord> batch = inMemBuffer.stream().skip(start).limit(batchSize).collect(Collectors.toList());
total += doBatch(batch, batch.size());
start += batch.size();
} while (total < inMemBuffer.size());
return total;
}

private long doBatch(final List<CSVRecord> batch, final int batchSize) {
int[] rr = jdbcTemplate.batchUpdate(UpdateSQL, new BatchPreparedStatementSetter() {
@Override
public void setValues(PreparedStatement ps, int i) throws SQLException {
final CSVRecord r = batch.get(i);
ps.setString(1, r.get(Fields.ssoid));
ps.setString(2, r.get(Fields.ts));
ps.setString(3, r.get(Fields.grp));
ps.setString(4, r.get(Fields.type));
ps.setString(5, r.get(Fields.subtype));
ps.setString(6, r.get(Fields.url));
ps.setString(7, r.get(Fields.orgid));
ps.setString(8, r.get(Fields.formid));
ps.setString(9, r.get(Fields.code));
ps.setString(10, r.get(Fields.ltpa));
ps.setString(11, r.get(Fields.sudirresponse));
ps.setString(12, r.get(Fields.ymdh));
}

@Override
public int getBatchSize() {
if (batchSize > batch.size()) {
return batch.size();
}
return batchSize;
}
});

final long ret = Arrays.stream(rr).sum();
logger.info("{} items saved", ret);
return ret;
}

private boolean recordIsUseless(CSVRecord r) {
final String ssoid = r.get(Fields.ssoid);
if (ssoid == null || ssoid.startsWith("Unauthorized") || ssoid.equals("")) {
return true;
}
final String formid = r.get(Fields.formid);
return formid == null || formid.equals("") || formid.startsWith("null");
}

@Override
public void run(ApplicationArguments args) throws Exception {
final String sourcePath = args.getOptionValues("data.csv").get(0);
logger.info("Start processing '{}'", sourcePath);
CSVParser parser = null;
try {
final Reader in = new FileReader(sourcePath);
final CSVFormat format = CSVFormat.EXCEL.withHeader(Fields.class)
.withDelimiter(';')
.withSkipHeaderRecord(true);
parser = new CSVParser(in, format);
int total = 0;
int rejected = 0;
for (final CSVRecord record : parser) {
String ssoid = record.get(Fields.ssoid);
total += 1;
if (recordIsUseless(record)) {
rejected += 1;
} else {
processCSVRecord(record);
}
}
logger.info("{} records seen, {} records rejected", total, rejected);

final long saved = batchInMemBuffer();

logger.info("{} records imported", saved);

} finally {
if (parser != null) {
parser.close();
}
}
}

public static void main(String[] args) {
SpringApplication.run(CsvImporter.class, args);
}
}
8 changes: 8 additions & 0 deletions csvimporter/src/main/resources/application.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
spring.datasource.driver-class-name=org.postgresql.Driver
spring.datasource.url=jdbc:postgresql://192.168.99.100:5432/postgres
spring.datasource.username=postgres
spring.datasource.password=secretpass
spring.datasource.tomcat.max-wait=20000
spring.datasource.tomcat.max-active=50
spring.datasource.tomcat.max-idle=20
spring.datasource.tomcat.min-idle=15
15 changes: 15 additions & 0 deletions datautils/datautils.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
26 changes: 26 additions & 0 deletions datautils/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>java-csv-task</artifactId>
<groupId>ru.ffyud.trials</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<packaging>jar</packaging>

<artifactId>data-utils</artifactId>


<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
</plugins>
</build>

</project>
5 changes: 5 additions & 0 deletions datautils/src/main/java/ru/ffyud/trials/csvdata/Fields.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package ru.ffyud.trials.csvdata;

public enum Fields {
ssoid, ts, grp, type, subtype, url, orgid, formid, code, ltpa, sudirresponse, ymdh,
}
Loading

0 comments on commit ab0f60f

Please sign in to comment.