Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added sampling to dnscap #15

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
8 changes: 8 additions & 0 deletions src/dnscap.1.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
.Op Fl E Ar datetime
.Op Fl P Ar plugin.so
.Op Fl U Ar str
.Op Fl q Ar nth
.Sh DESCRIPTION
.Nm
is a network capture utility designed specifically for DNS traffic. It
Expand Down Expand Up @@ -353,6 +354,13 @@ Enable immediate mode on interfaces.
Append "and
.Ar str
" to the pcap filter.
.It Fl q Ar nth
Causes the output to be sampled after the application of all other filters.
Only every nth dns initiation will be output and responses are only output
if they correspond to one of the sampled queries. This option cannot be
used with option
.Fl s
[r] due to obvious filter reasons.
.El
.Pp
If started with no options,
Expand Down
96 changes: 92 additions & 4 deletions src/dnscap.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ static const char version_fmt[] = "V1.0-OARC-r%d (%s)";
#include <time.h>
#include <unistd.h>
#include <pwd.h>
#include "../uthash/uthash.h" //for the hash used in the sample module

#ifdef __linux__
extern char *strptime(const char *, const char *, struct tm *);
Expand Down Expand Up @@ -287,6 +288,19 @@ struct plugin {
};
LIST(struct plugin) plugins;

typedef struct{
UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h"
uint32_t from; //these next 5 fields make up the compound key
int sport,dport,transaction_id;
uint32_t to;
}samplePacket ;

typedef struct{
uint32_t from;
int sport,dport,transaction_id;
uint32_t to;
}sample_lookup_key;

/* Forward. */

static void setsig(int, int);
Expand Down Expand Up @@ -384,6 +398,12 @@ static unsigned long long mem_limit = (unsigned) MEM_MAX; // process memory li
static int mem_limit_set = 1; // Should be configurable
const char DROPTOUSER[] = "nobody";
static pcap_thread_t pcap_thread = PCAP_THREAD_T_INIT;
static int sample = FALSE;
static unsigned sampleAmount;
static unsigned querycount;
static samplePacket *allSampleQueries = NULL;
static int chooseSidesResponse = FALSE;
static unsigned keylen;

/* Public. */

Expand Down Expand Up @@ -694,7 +714,8 @@ help_1(void) {
"\t[-w <base> [-W <suffix>] [-k <cmd>]] [-t <lim>] [-c <lim>] [-C <lim>]\n"
"\t[-x <pat>]+ [-X <pat>]+\n"
"\t[-B <datetime>] [-E <datetime>]\n"
"\t[-P plugin.so] [-U <str>]\n",
"\t[-P plugin.so] [-U <str>]\n"
"\t[-q <nth>]\n",
ProgramName);
}

Expand Down Expand Up @@ -755,6 +776,8 @@ help_2(void) {
"\t-E <datetime> end collecting at this date and time\n"
"\t-M set monitor mode on interfaces\n"
"\t-D set immediate mode on interfaces\n"
"\t-q <nth> output only every nth DNS query and only output responses\n \
if they correspond to one of the sampled queries\n"
);
}

Expand Down Expand Up @@ -782,7 +805,7 @@ parse_args(int argc, char *argv[]) {
INIT_LIST(myregexes);
INIT_LIST(plugins);
while ((ch = getopt(argc, argv,
"a:bc:de:fgh:i:k:l:m:pr:s:t:u:w:x:"
"a:bc:de:fgh:i:k:l:m:pq:r:s:t:u:w:x:"
#ifdef USE_SECCOMP
"y"
#endif
Expand Down Expand Up @@ -889,6 +912,7 @@ parse_args(int argc, char *argv[]) {
msg_wanted = u;
break;
case 's':
chooseSidesResponse = TRUE;
u = 0;
for (p = optarg; *p; p++)
switch (*p) {
Expand All @@ -898,6 +922,13 @@ parse_args(int argc, char *argv[]) {
}
dir_wanted = u;
break;
case 'q':
sample = TRUE;
sampleAmount = atoi(optarg);
if(sampleAmount == 0)
usage("-q takes only unsigned integer values != 0");
querycount = 0;
break;
case 'h':
u = 0;
for (p = optarg; *p; p++)
Expand Down Expand Up @@ -1099,6 +1130,11 @@ parse_args(int argc, char *argv[]) {
usage("the -L and -l options are mutually exclusive");
if (background && (dumptrace || preso))
usage("the -b option is incompatible with -d and -g");
if (sample && chooseSidesResponse)
{
if(((dir_wanted & DIR_RESPONSE) != 0) && ((dir_wanted & DIR_INITIATE) == 0))
usage("the -q option is incompatible with -s r");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second glance I do not see the reason to have chooseSidesResponse, dir_wanted is set to both direction at start and you really only need to check if DIR_RESPONSE is not set.

}
if (dumptrace >= 1) {
endpoint_ptr ep;
const char *sep;
Expand Down Expand Up @@ -2339,8 +2375,60 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf,
abort();
}
}
output(descr, from, to, proto, flags, sport, dport, ts,
pkt_copy, olen, dnspkt, dnslen);
/*Sample Module*/
if (sample == TRUE)
{
ns_msg dnsmsgSample;
ns_initparse(dnspkt,dnslen,&dnsmsgSample);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is part of libbind and dnscap can be compiled without libbind.

Copy link
Author

@chegger chegger Sep 15, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean ns_msg and ns_initparse? Is there another way to retrieve the transaction id of a query? I've seen ns_initparse being used in the output functions in dump_dns.c, so I assumed it was safe to use.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at the top #if HAVE_NS_INITPARSE && ..., you will need to disable all the sampling code if libbind is missing or create your own functions for parsing the required information.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I'll disable the sampling code if libbind is missing. In future i might try to write my own parsing functions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sampling code is still compiled even if libbind does not exist, checking if sample is true or not does not remove the dependency of the functions.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but in parse_args() sample is only set to true if libbind does exist, by checking #if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR . Is this checkback not suitable?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You do know the difference between if() and #if right?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do and I just realized where my mistake in thinking was at. I will work on it right away.

samplePacket *currentQuery;

uint32_t *fromBuffer = (uint32_t*)&from.u;
uint32_t *toBuffer = (uint32_t*)&to.u;

keylen = offsetof(samplePacket,to) //keylen is used to define which fields of the hash structure are added
+ sizeof(*toBuffer) //as a compound key. Here, the key is composed of all fields between (and including)
- offsetof(samplePacket,from); //samplePacket->to and samplePacket->from (from, sport, dport, transaction_id, to)

if(dns.qr == 0)
{
querycount++;
if(querycount % sampleAmount == 0)
{
currentQuery = calloc(1,sizeof(*currentQuery));
assert(currentQuery != NULL);
currentQuery->from = *fromBuffer;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are meant to be in samplePacket->from ?
From the struct definition from is just an unsigned and this would essentially put the first unsigned from the fromBuffer into currentQuery. That would be the first part of iaddr.u and that is only the IPv4 address (or a very small part of it) which means this does not work for IPv6.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

samplePacket->from should contain IP addresses (both IPv4 and v6 would be optimal).

On my system unsigned is 32 bits (which is the size of IPv4 adresses), so it should be able to store a full IPv4. I'll change it to uint_32 so it also works on all systems.
As to IPv6, I did not find out how to handle such big numbers properly. I thought it would be better to take a small portion of the IPv6 addresses than not to work with IPv6 at all.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry you need to find a way to support both v4 and v6 otherwise I will reject the pull request. I do not know how exactly uthash works but maybe you can use the either iaddr which from is.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially used from but I came to the conclusion that it doesn't work properly. I'll give it another try though. If I don't get it to work like this, I'll look for another way to support IPv6 addresses.

In case I can't get it to work with IPv6 at all, I'll inform you, so you can reject this pull request.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at plugins/rssm/rssm.c for an example of how to make a hash table with both v4 and v6 addresses ("my_hashtbl").

dnscap_common.h defines 'iaddr' which you should use to store v4/v6 addrs.

rssm.c (and other plugin .c files) have iaddr_hash().

At some point it would make sense to move hashtbl.c and supporting functions out of plugins and into src.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found a way that will work perfectly fine. Ill commit it on Monday!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry but you have implemented this in a very dangerous way and the use of AMOUNT_32BIT_IN_128BIT is unneeded. You are copying in and out of different structures without any check of structure sizes and seeing that the hash can take binary blobs as keys you should be able to put two iaddr structures within the hash entry for both to and from v4/v6 addresses.

currentQuery->to = *toBuffer;
currentQuery->sport = sport;
currentQuery->dport = dport;
currentQuery->transaction_id = ns_msg_id(dnsmsgSample);

HASH_ADD(hh,allSampleQueries,from,keylen,currentQuery);
output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen);
}
}
else
{
sample_lookup_key *lookup_key = (sample_lookup_key*)calloc(1,sizeof(*lookup_key));
assert(lookup_key != NULL);
lookup_key->from = *toBuffer;
lookup_key->to = *fromBuffer;
lookup_key->dport = sport;
lookup_key->sport = dport;
lookup_key->transaction_id = ns_msg_id(dnsmsgSample);

HASH_FIND(hh,allSampleQueries,&lookup_key->from,keylen,currentQuery);
if(currentQuery)
{
HASH_DEL(allSampleQueries,currentQuery);
free(currentQuery);
output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen);
}
free(lookup_key);
}
}else
{
output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen);
}
}

/*
Expand Down
Loading