qiita-spots · antgonza · May 13, 2023 · May 10, 2023 · May 10, 2023 · May 10, 2023
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -145,7 +145,7 @@ def create(cls, owner, name, description, from_default=False,
             If the duplicated sample ids in the selected studies should be
             merged or prepended with the artifact ids. False (default) prepends
             the artifact id
-        categories : set of str, optional
+        categories : list of str, optional
             If not None, use _only_ these categories for the metaanalysis
 
         Returns

diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py
@@ -43,6 +43,7 @@ class Artifact(qdb.base.QiitaObject):
     prep_template
     ebi_run_accession
     study
+    has_human
 
     Methods
     -------
@@ -1550,6 +1551,27 @@ def being_deleted_by(self):
             res = qdb.sql_connection.TRN.execute_fetchindex()
         return qdb.processing_job.ProcessingJob(res[0][0]) if res else None
 
+    @property
+    def has_human(self):
+        has_human = False
+        if self.artifact_type == 'per_sample_FASTQ':
+            st = self.study.sample_template
+            if 'env_package' in st.categories:
+                sql = f"""SELECT DISTINCT sample_values->>'env_package'
+                          FROM qiita.sample_{st.id} WHERE sample_id in (
+                              SELECT sample_id from qiita.preparation_artifact
+                              LEFT JOIN qiita.prep_template_sample USING (
+                                  prep_template_id)
+                              WHERE artifact_id = {self.id})"""
+                with qdb.sql_connection.TRN:
+                    qdb.sql_connection.TRN.add(sql)
+                    for v in qdb.sql_connection.TRN.execute_fetchflatten():
+                        if v.startswith('human-'):
+                            has_human = True
+                            break
+
+        return has_human
+
     def jobs(self, cmd=None, status=None, show_hidden=False):
         """Jobs that used this artifact as input
 

diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py
@@ -677,6 +677,28 @@ def setUp(self):
 
         self._clean_up_files = [self.fp1, self.fp2, self.fp3, self.fp4]
 
+        # per_sample_FASTQ Metagenomic example
+
+        self.prep_template_per_sample_fastq = \
+            qdb.metadata_template.prep_template.PrepTemplate.create(
+                metadata, qdb.study.Study(1), "Metagenomic")
+        fd, self.fwd = mkstemp(prefix='SKB8.640193', suffix='_R1.fastq')
+        close(fd)
+        with open(self.fwd, 'w') as f:
+            f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
+                    "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
+                    "+\n"
+                    "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
+        fd, self.rev = mkstemp(prefix='SKB8.640193', suffix='_R2.fastq')
+        close(fd)
+        with open(self.rev, 'w') as f:
+            f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
+                    "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
+                    "+\n"
+                    "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
+
+        self._clean_up_files.extend([self.fwd, self.rev])
+
     def tearDown(self):
         for f in self._clean_up_files:
             if exists(f):
@@ -1364,6 +1386,26 @@ def test_descendants_with_jobs_one_element(self):
         exp = [('artifact', artifact)]
         self.assertCountEqual(obs, exp)
 
+    def test_has_human(self):
+        # testing a FASTQ artifact (1), should be False
+        self.assertFalse(qdb.artifact.Artifact(1).has_human)
+
+        # create a per_sample_FASTQ
+        artifact = qdb.artifact.Artifact.create(
+            [(self.fwd, 1), (self.rev, 2)], "per_sample_FASTQ",
+            prep_template=self.prep_template_per_sample_fastq)
+
+        # this should be False as there are no human samples
+        self.assertFalse(artifact.has_human)
+
+        # let's make it True by making the samle human-*
+        df = pd.DataFrame.from_dict(
+            {'1.SKB8.640193': {'env_package': 'human-oral'}},
+            orient='index', dtype=str)
+        artifact.study.sample_template.update(df)
+
+        self.assertTrue(artifact.has_human)
+
 
 @qiita_test_checker()
 class ArtifactArchiveTests(TestCase):

diff --git a/qiita_pet/handlers/artifact_handlers/base_handlers.py b/qiita_pet/handlers/artifact_handlers/base_handlers.py
@@ -43,8 +43,15 @@ def check_artifact_access(user, artifact):
     """
     if user.level in ('admin', 'wet-lab admin'):
         return
-    if artifact.visibility != 'public':
-        study = artifact.study
+
+    study = artifact.study
+    if artifact.visibility == 'public':
+        # if it's public we need to confirm that this artifact has no possible
+        # human sequences
+        if artifact.has_human and not study.has_access(user, True):
+            raise QiitaHTTPError(403, "Access denied to artifact %s"
+                                      % artifact.id)
+    else:
         analysis = artifact.analysis
         if study:
             if not study.has_access(user):

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
@@ -235,7 +235,7 @@ def get(self, study_id):
              (self.current_user in study.shared_with)))
 
         for a in study.artifacts(artifact_type='BIOM'):
-            if full_access or a.visibility == 'public':
+            if full_access or (a.visibility == 'public' and not a.has_human):
                 to_download.extend(self._list_artifact_files_nginx(a))
 
         self._write_nginx_file_list(to_download)
@@ -289,7 +289,7 @@ def get(self, study_id):
         to_download = []
         for a in study.artifacts():
             if not a.parents:
-                if not is_owner and a.visibility != 'public':
+                if not is_owner and (a.visibility != 'public' or a.has_human):
                     continue
                 to_download.extend(self._list_artifact_files_nginx(a))
 
@@ -460,7 +460,7 @@ def get(self):
                         artifacts = study.artifacts(
                             dtype=data_type, artifact_type='BIOM')
                     for a in artifacts:
-                        if a.visibility != 'public':
+                        if a.visibility != 'public' or a.has_human:
                             continue
                         to_download.extend(self._list_artifact_files_nginx(a))
 
@@ -498,6 +498,10 @@ def get(self):
                     raise HTTPError(404, reason='Artifact is not public. If '
                                     'this is a mistake contact: '
                                     '[email protected]')
+                elif artifact.has_human:
+                    raise HTTPError(404, reason='Artifact has possible human '
+                                    'sequences. If this is a mistake contact: '
+                                    '[email protected]')
                 else:
                     to_download = self._list_artifact_files_nginx(artifact)
                     if not to_download:

diff --git a/setup.py b/setup.py
@@ -102,7 +102,7 @@
       scripts=glob('scripts/*'),
       # making sure that numpy is installed before biom
       setup_requires=['numpy', 'cython'],
-      install_requires=['psycopg2', 'click', 'bcrypt', 'pandas',
+      install_requires=['psycopg2', 'click', 'bcrypt', 'pandas<2.0',
                         'biom-format', 'tornado<6.0', 'toredis', 'redis',
                         'scp', 'pyparsing', 'h5py',  'natsort', 'nose', 'pep8',
                         'networkx', 'humanize', 'wtforms<3.0.0', 'nltk',