-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfraggy.anlz
executable file
·111 lines (88 loc) · 4.32 KB
/
fraggy.anlz
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env Rscript
##------------------------------------------------------------------------------
## fraggy analyzer and visualiser
##
## @author Lars Thoms
## @date 2021-01-24
##------------------------------------------------------------------------------
library(argparser)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
##------------------------------------------------------------------------------
## arguments parser
##------------------------------------------------------------------------------
# initialize parser
p <- arg_parser("analyzer and visualizer of fragmentation distribution", name = "fraggy.anlz", hide.opts = TRUE)
# define arguments
p <- add_argument(p, "--xlim", default = 0, type = "numeric", help = "boundaries on x axis")
p <- add_argument(p, "--prefix", default = "", help = "prefix of generated output files, like current date or name of the tested drive")
p <- add_argument(p, "--output", default = "./output", nargs = 1, help = "output path of graphs and statistic tables")
p <- add_argument(p, "--input", nargs = Inf, help = "input path of CSV table generated by fraggy.agg")
p <- add_argument(p, "--label", nargs = Inf, help = "label of the current run")
# parse arguments
argv <- parse_args(p)
if(is.na(argv$input))
{
print(p)
stop("A minimum of one input file is required")
}
if(length(argv$label) != length(argv$input))
{
print(p)
stop("The number of parameters of '--input' and '--label' have to be the equal")
}
# create output directory
if(!dir.exists(argv$output))
{
dir.create(argv$output, recursive = TRUE)
}
##------------------------------------------------------------------------------
## data reader
##------------------------------------------------------------------------------
data <- NULL
# fetch measurement data from CSV files
for(i in seq_along(argv$input))
{
data_csv <- read.csv(argv$input[i])
data_csv$label = argv$label[i]
data <- rbind(data, data_csv)
}
##------------------------------------------------------------------------------
## generate distribution graph
##------------------------------------------------------------------------------
# create lineplot
data %>%
ggplot(aes(x = fragments, y = amount, color = factor(label, levels = argv$label), fill = factor(label, levels = argv$label))) +
labs(x = "fragments per file") +
labs(y = "frequency") +
labs(color = "data records") +
theme(legend.position = "bottom") +
scale_color_manual(values = c("#000000", "#FF8B83", "#ADD8E6")) +
scale_fill_manual(values = c("#000000", "#FF8B83", "#ADD8E6")) +
scale_y_log10() +
xlim(1, ifelse(argv$xlim == 0, NA, argv$xlim)) +
geom_freqpoly(stat = "identity", position = "identity", na.rm = TRUE) -> image
# save image
ggsave(file = paste(argv$output, "/", argv$prefix, "distribution.svg", sep = ""), plot = image, width = 10, height = 5)
##------------------------------------------------------------------------------
## generate statistical data
##------------------------------------------------------------------------------
data_sum1 <- filter(data, fragments == 1)
data_sum5 <- filter(data, fragments <= 5)
data_sum10 <- filter(data, fragments <= 10)
data_sum_r <- filter(data, fragments > 10)
statistic_sum1 <- aggregate(list(sum1 = data_sum1$amount), list(label = data_sum1$label), function(x) sum(x))
statistic_sum5 <- aggregate(list(sum5 = data_sum5$amount), list(label = data_sum5$label), function(x) sum(x))
statistic_sum10 <- aggregate(list(sum10 = data_sum10$amount), list(label = data_sum10$label), function(x) sum(x))
statistic_sum_r <- aggregate(list(sum_r = data_sum_r$amount), list(label = data_sum_r$label), function(x) sum(x))
statistic_max <- aggregate(list(max = data$fragments), list(label = data$label), function(x) max(x))
statistic <- cbind(statistic_sum1)
statistic <- merge(statistic, statistic_sum5, by = "label")
statistic <- merge(statistic, statistic_sum10, by = "label")
statistic <- merge(statistic, statistic_sum_r, by = "label")
statistic <- merge(statistic, statistic_max, by = "label")
# save statistics
colnames(statistic) <- c("label", "sum == 1", "sum <= 5" , "sum <= 10", "sum > 10", "max")
write.csv(statistic, paste(argv$output, "/", argv$prefix, "statistics.csv", sep = ""), row.names = FALSE, quote = FALSE)
# print statistics
statistic