From e8df8ab000aab540654d24b23ca911c1ae2a4663 Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:14:44 +0530 Subject: [PATCH] Fix file upload security loopholes (#14) * Add file upload validation * Code formatting * Code formatting * Change values to constants --- backend/file_management/constants.py | 3 + backend/file_management/serializer.py | 30 +++++++++- backend/utils/FileValidator.py | 83 +++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 backend/utils/FileValidator.py diff --git a/backend/file_management/constants.py b/backend/file_management/constants.py index 82c16e045..e12a31d9b 100644 --- a/backend/file_management/constants.py +++ b/backend/file_management/constants.py @@ -3,3 +3,6 @@ class FileInformationKey: FILE_TYPE = "type" FILE_LAST_MODIFIED = "LastModified" FILE_SIZE = "size" + FILE_UPLOAD_MAX_SIZE = 100 * 1024 * 1024 + FILE_UPLOAD_ALLOWED_EXTENSIONS = ['pdf'] + FILE_UPLOAD_ALLOWED_MIMETYPES = ['application/pdf'] \ No newline at end of file diff --git a/backend/file_management/serializer.py b/backend/file_management/serializer.py index 5efa0a665..dfd220cfa 100644 --- a/backend/file_management/serializer.py +++ b/backend/file_management/serializer.py @@ -1,5 +1,8 @@ from rest_framework import serializers +from file_management.constants import FileInformationKey +from utils.FileValidator import FileValidator + class FileInfoSerializer(serializers.Serializer): name = serializers.CharField() @@ -15,13 +18,36 @@ class FileListRequestSerializer(serializers.Serializer): class FileUploadSerializer(serializers.Serializer): - file = serializers.ListField(child=serializers.FileField(), required=True) + file = serializers.ListField( + child=serializers.FileField(), + required=True, + validators=[ + FileValidator( + allowed_extensions=FileInformationKey.FILE_UPLOAD_ALLOWED_EXTENSIONS, + allowed_mimetypes=FileInformationKey.FILE_UPLOAD_ALLOWED_MIMETYPES, + min_size=0, + max_size=FileInformationKey.FILE_UPLOAD_MAX_SIZE, + ) + ], + ) + # FileExtensionValidator(allowed_extensions=['pdf']) connector_id = serializers.UUIDField() path = serializers.CharField() class FileUploadIdeSerializer(serializers.Serializer): - file = serializers.ListField(child=serializers.FileField(), required=True) + file = serializers.ListField( + child=serializers.FileField(), + required=True, + validators=[ + FileValidator( + allowed_extensions=FileInformationKey.FILE_UPLOAD_ALLOWED_EXTENSIONS, + allowed_mimetypes=FileInformationKey.FILE_UPLOAD_ALLOWED_MIMETYPES, + min_size=0, + max_size=FileInformationKey.FILE_UPLOAD_MAX_SIZE, + ) + ], + ) class FileInfoIdeSerializer(serializers.Serializer): diff --git a/backend/utils/FileValidator.py b/backend/utils/FileValidator.py new file mode 100644 index 000000000..62bbf22a5 --- /dev/null +++ b/backend/utils/FileValidator.py @@ -0,0 +1,83 @@ +import magic +from os.path import splitext + +from django.core.exceptions import ValidationError +from django.template.defaultfilters import filesizeformat +from django.utils.translation import gettext_lazy as _ +from django.utils.translation import ngettext_lazy + + +class FileValidator(object): + """ + Validator for files, checking the size, extension and mimetype. + + Initialization parameters: + allowed_extensions: iterable with allowed file extensions + ie. ('txt', 'doc') + allowed_mimetypes: iterable with allowed mimetypes + ie. ('image/png', ) + min_size: minimum number of bytes allowed + ie. 100 + max_size: maximum number of bytes allowed + ie. 24*1024*1024 for 24 MB + + """ + + extension_message = _("Extension '%(extension)s' not allowed. " + "Allowed extensions are: '%(allowed_extensions)s.'") + mime_message = _("MIME type '%(mimetype)s' is not valid. " + "Allowed types are: %(allowed_mimetypes)s.") + min_size_message = _('The current file %(size)s, which is too small. ' + 'The minumum file size is %(allowed_size)s.') + max_size_message = _('The current file %(size)s, which is too large. ' + 'The maximum file size is %(allowed_size)s.') + + def __init__(self, *args, **kwargs): + self.allowed_extensions = kwargs.pop('allowed_extensions', None) + self.allowed_mimetypes = kwargs.pop('allowed_mimetypes', None) + self.min_size = kwargs.pop('min_size', 0) + self.max_size = kwargs.pop('max_size', None) + + def __call__(self, value): + """ + Check the extension, content type and file size for each file + """ + for file in value: + # Check the extension + ext = splitext(file.name)[1][1:].lower() + if self.allowed_extensions and not ext in self.allowed_extensions: + message = self.extension_message % { + 'extension' : ext, + 'allowed_extensions': ', '.join(self.allowed_extensions) + } + + raise ValidationError(message) + + # Check the content type + mimetype = magic.from_buffer(file.read(2048), mime=True) + if (self.allowed_mimetypes and + not mimetype in self.allowed_mimetypes): + message = self.mime_message % { + 'mimetype': mimetype, + 'allowed_mimetypes': ', '.join(self.allowed_mimetypes) + } + + raise ValidationError(message) + + # Check the file size + filesize = len(file) + if self.max_size and filesize > self.max_size: + message = self.max_size_message % { + 'size': filesizeformat(filesize), + 'allowed_size': filesizeformat(self.max_size) + } + + raise ValidationError(message) + + elif filesize < self.min_size: + message = self.min_size_message % { + 'size': filesizeformat(filesize), + 'allowed_size': filesizeformat(self.min_size) + } + + raise ValidationError(message) \ No newline at end of file