Skip to content

Commit

Permalink
1x report file for exception (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
sadikovi authored Apr 26, 2017
1 parent ce2764e commit 433e0cf
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
21 changes: 18 additions & 3 deletions src/main/scala/com/github/sadikovi/spark/rdd/NetFlowFileRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.io.IOException
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path}
Expand Down Expand Up @@ -109,10 +110,17 @@ private[spark] class NetFlowFileRDD[T <: SQLRow : ClassTag] (

// Prepare file stream
var stm: FSDataInputStream = fs.open(path)
val reader = NetFlowReader.prepareReader(stm, elem.bufferSize, ignoreCorruptFiles)
// If reader initialization fails we either reset to null or report error
// check for valid reader (or null) will ensure that we log corrupt file
val reader = try {
NetFlowReader.prepareReader(stm, elem.bufferSize, ignoreCorruptFiles)
} catch {
case NonFatal(err) if ignoreCorruptFiles => null
case NonFatal(err) => throw new RuntimeException(s"${err.getMessage}, file=$path", err)
}
// this flag is only checked when ignoreCorruptFiles = true, otherwise initialization will
// throw exception, if file is corrupt
if (!reader.isValid()) {
if (reader == null || !reader.isValid()) {
logWarning(s"Failed to read file $path, ignoreCorruptFiles=$ignoreCorruptFiles")
buffer = buffer ++ Iterator.empty
} else {
Expand Down Expand Up @@ -152,12 +160,19 @@ private[spark] class NetFlowFileRDD[T <: SQLRow : ClassTag] (

val rawIterator = new CloseableIterator[Array[Object]] {
private var delegate = recordBuffer.iterator().asScala
// add filepath to report for any error message
private val filepath = path

override def getNext(): Array[Object] = {
// If delegate has traversed over all elements mark it as finished
// to allow to close stream
if (delegate.hasNext) {
delegate.next
try {
delegate.next
} catch {
case NonFatal(err) =>
throw new RuntimeException(s"${err.getMessage}, file=$filepath", err)
}
} else {
finished = true
null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ class NetFlowSuite extends SparkNetFlowTestSuite {
load(s"file:${path3}").count()
}
val msg = err.getMessage()
assert(msg.contains("java.io.IOException: " +
"Corrupt NetFlow file. Wrong magic number"))
assert(msg.contains("Corrupt NetFlow file. Wrong magic number"))
assert(msg.contains(path3))
}

test("fail to read data of corrupt file") {
Expand All @@ -169,7 +169,8 @@ class NetFlowSuite extends SparkNetFlowTestSuite {
df.select("srcip").count()
}
val msg = err.getMessage()
assert(msg.contains("java.lang.IllegalArgumentException: Unexpected EOF"))
assert(msg.contains("Unexpected EOF"))
assert(msg.contains(path4))
}

test("fail to read unsupported version 8") {
Expand Down

0 comments on commit 433e0cf

Please sign in to comment.