diff --git a/xena_gdc_etl/gdc2xena.py b/xena_gdc_etl/gdc2xena.py
index 8cb76aa..708b7cc 100644
--- a/xena_gdc_etl/gdc2xena.py
+++ b/xena_gdc_etl/gdc2xena.py
@@ -29,7 +29,7 @@
 import time
 import shutil
 
-from .xena_dataset import GDCOmicset, GDCPhenoset, GDCSurvivalset
+from .xena_dataset import GDCOmicset, GDCPhenoset, GDCSurvivalset, TCGAPhenoset
 
 
 def gdc2xena(root_dir, projects, xena_dtypes, delete_raw_data=False):
@@ -86,7 +86,7 @@ def gdc2xena(root_dir, projects, xena_dtypes, delete_raw_data=False):
                 if project.startswith('TARGET'):
                     dataset = GDCPhenoset(project, 'clinical', root_dir)
             elif dtype == 'GDC_phenotype':
-                dataset = GDCPhenoset(project, 'GDC_phenotype', root_dir)
+                dataset = TCGAPhenoset(project, root_dir)
             else:
                 dataset = GDCOmicset(project, dtype, root_dir)
             try:
diff --git a/xena_gdc_etl/xena_dataset.py b/xena_gdc_etl/xena_dataset.py
index 79bf785..976558a 100644
--- a/xena_gdc_etl/xena_dataset.py
+++ b/xena_gdc_etl/xena_dataset.py
@@ -1864,6 +1864,419 @@ def transform(self):
         return self
 
 
+class TCGAPhenoset(XenaDataset):
+    r"""TCGAPhenoset is derived from the ``XenaDataset`` class and represents
+    for a Xena matrix whose data is phenotype data of TCGA projects.
+
+    This class provides a set of default configurations for downloading and
+    transforming phenotype data of TCGA projects, as well as generating
+    associated metadata for the transformed Xena matrix. These default
+    configurations are stored as private constants, and they can be checked
+    and/or changed through the following attributes: ``gdc_release``,
+    ``gdc_filter``, ``download_map``, ``raws2matrix``, ``metadata_template``,
+    and ``metadata_vars``.
+
+    Attributes:
+        projects (str or list): One (string) or a list of GDC's
+            "cases.project.project_id". All corresponding projects will be
+            included in this dataset.
+        gdc_release (str): URL to the data release note for the dataset. It
+            will be used by the ``metadata`` method when making the metadata
+            for this dataset. It is highly recommended that this attribute is
+            set explicitly by the user so that it is guaranteed to match the
+            data (raw data) underlying this dataset. If it is not available,
+            the most recent data release will be queried and used.
+        gdc_filter (dict): A filter for querying GDC data underlying this
+            dataset. Each item of this dict means to be an "in" operation,
+            with its key being one GDC API available field and its value being
+            a string or a list of strings. It can be automatically derived
+            from ``projects`` and ``xena_dtype`` if it is not assigned
+            explicitly by the user when being used. Please check `GDC API
+            documentation
+            <https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#filters-specifying-the-query>`_
+            for details.
+        download_map (dict): A dict with the key being a URL for one raw data
+            to be downloaded and the value being a path for saving downloaded
+            raw data. If it hasn't been assigned explicitly by the user when
+            being used, it can be automatically generated by querying through
+            GDC API according to ``gdc_filter`` which are based on
+            ``projects`` and ``xena_dtype``. Filename of data files, by
+            default, will adapt a pattern of
+            "<data_category>.<GDC file UUID>.<file extension>"
+
+            It is worth noting the "<data_category>" prefix can be useful or
+            even necessary for ``transform`` method to apply correct
+            transformation to the file. "<data_category>" is closely related
+            to the format of the file.
+        metadata_template (jinja2.environment.Template or str): A Jinja2
+            template for rendering metadata of this dataset. When setting this
+            attribute with a string, it will be taken as a path to the
+            template file and the corresponding template will be retrieved and
+            assigned to this attribute. Defaults, if needed, can be mapped
+            from ``xena_dtype``.
+        metadata_vars (dict): A dict of variables which will be used (by \*\*
+            unpacking) when rendering the ``metadata_template``. Defaults, if
+            needed, can be derived from corresponding matrix and ``projects``
+            and ``xena_dtype`` properties.
+    """
+
+    # To resovle overlapping between raw data and API data, remove columns
+    # according to the following lists.
+    _API_DROPS = [
+        'id',
+        'case_id',
+        'state',
+        'created_datetime',
+        'updated_datetime',
+        'demographic_id.demographic',
+        'submitter_id.demographic',
+        'state.demographic',
+        'created_datetime.demographic',
+        'updated_datetime.demographic',
+        'diagnosis_id.diagnoses',
+        'submitter_id.diagnoses',
+        'state.diagnoses',
+        'created_datetime.diagnoses',
+        'updated_datetime.diagnoses',
+        'treatment_id.treatments.diagnoses',
+        'submitter_id.treatments.diagnoses',
+        'state.treatments.diagnoses',
+        'created_datetime.treatments.diagnoses',
+        'updated_datetime.treatments.diagnoses',
+        'exposure_id.exposures',
+        'submitter_id.exposures',
+        'state.exposures',
+        'created_datetime.exposures',
+        'updated_datetime.exposures',
+        'pathology_report_uuid.samples',
+        'state.project',
+        'released.project',
+        'sample_id.samples',
+        'created_datetime.samples',
+        'updated_datetime.samples',
+        'tissue_source_site_id.tissue_source_site',
+    ]
+    _RAW_DROPS = [
+        'alcohol_history_documented',
+        'bcr_patient_barcode',
+        'bcr_patient_uuid',
+        'bcr_sample_uuid',
+        'composition',
+        'current_weight',
+        'days_to_birth',
+        'days_to_collection',
+        'days_to_death',
+        'days_to_last_followup',
+        'days_to_sample_procurement',
+        'ethnicity',
+        'freezing_method',
+        'gender',
+        'height',
+        'icd_10',
+        'icd_o_3_histology',
+        'icd_o_3_site',
+        'initial_weight',
+        'intermediate_dimension',
+        'is_ffpe',
+        'longest_dimension',
+        'oct_embedded',
+        'pathologic_stage',
+        'pathology_report_uuid',
+        'preservation_method',
+        'primary_diagnosis',
+        'race',
+        'sample_type',
+        'sample_type_id',
+        'shortest_dimension',
+        'state',
+        'time_between_clamping_and_freezing',
+        'time_between_excision_and_freezing',
+        'tissue_type',
+        'tumor_descriptor',
+        'tumor_tissue_site',
+        'vital_status',
+    ]
+
+    @property
+    def gdc_release(self):
+        try:
+            return self.__gdc_release
+        except AttributeError:
+            data_release = gdc.search('status', typ='json')['data_release']
+            anchor = (
+                re.match(r'(Data Release [^\s]+)\s', data_release)
+                .group(1)
+                .replace(' ', '-')
+                .replace('.', '')
+                .lower()
+            )
+            self.__gdc_release = GDC_RELEASE_URL + '#' + anchor
+            return self.__gdc_release
+
+    @gdc_release.setter
+    def gdc_release(self, url):
+        self.__gdc_release = url
+
+    # Set default query filter dict for GDC API if it hasn't been set yet.
+    @property
+    def gdc_filter(self):
+        try:
+            assert self.__gdc_filter
+            return self.__gdc_filter
+        except (AttributeError, AssertionError):
+            self.__gdc_filter = {
+                'access': 'open',
+                'cases.project.project_id': self.projects,
+                'data_category': 'Clinical',
+                'data_format': 'BCR XML',
+            }
+            return self.__gdc_filter
+
+    @gdc_filter.setter
+    def gdc_filter(self, filter_dict):
+        self.__gdc_filter = filter_dict
+
+    @XenaDataset.download_map.getter
+    def download_map(self):
+        try:
+            assert self._download_map
+            return self._download_map
+        except (AttributeError, AssertionError):
+            fields = ['file_id', 'file_name', 'data_category']
+            try:
+                print('Searching for raw clinical data ...', end='')
+                file_df = gdc.search(
+                    'files', in_filter=self.gdc_filter, fields=fields
+                )
+            except Exception:
+                file_dict = {}
+            else:
+                file_df.set_index('file_id', drop=False, inplace=True)
+                file_dict = (
+                    file_df['data_category'].astype(str)
+                    + '.'
+                    + file_df['file_id'].astype(str)
+                    + '.'
+                    + file_df['file_name'].apply(gdc.get_ext)
+                ).to_dict()
+            if not file_dict:
+                msg = '\rNo {} data found for project {}.'
+                gdc_dtype = self._XENA_GDC_DTYPE[self.xena_dtype]
+                print(
+                    msg.format(
+                        ' - '.join(sorted(gdc_dtype.values())),
+                        str(self.projects),
+                    )
+                )
+                return file_dict
+            file_dict = {
+                '{}/data/{}'.format(gdc.GDC_API_BASE, uuid): os.path.join(
+                    self.raw_data_dir, name
+                )
+                for uuid, name in file_dict.items()
+            }
+            self._download_map = file_dict
+            msg = '\r{} files found for clinical data of {}.'
+            print(msg.format(len(file_dict), self.projects))
+            return self._download_map
+
+    @property
+    def metadata_vars(self):
+        try:
+            assert self.__metadata_vars and isinstance(
+                self.__metadata_vars, dict
+            )
+            return self.__metadata_vars
+        except (AttributeError, AssertionError):
+            matrix_date = time.strftime(
+                "%m-%d-%Y", time.gmtime(os.path.getmtime(self.matrix))
+            )
+            projects = ','.join(self.projects)
+            variables = {
+                'project_id': projects,
+                'date': matrix_date,
+                'gdc_release': self.gdc_release,
+            }
+            if projects in GDC_XENA_COHORT:
+                variables['xena_cohort'] = GDC_XENA_COHORT[projects]
+            else:
+                variables['xena_cohort'] = 'GDC ' + projects
+            self.__metadata_vars = variables
+            return self.__metadata_vars
+
+    @metadata_vars.setter
+    def metadata_vars(self, variables):
+        self.__metadata_vars = variables
+
+    def __init__(
+        self,
+        projects,
+        root_dir='.',
+        raw_data_dir=None,
+        matrix_dir=None,
+    ):
+        self.projects = projects
+        self.xena_dtype = 'phenotype'
+        self.root_dir = root_dir
+        if matrix_dir is not None:
+            self.matrix_dir = matrix_dir
+        jinja2_env = jinja2.Environment(
+            loader=jinja2.PackageLoader('xena_gdc_etl', 'resources')
+        )
+        self.metadata_template = jinja2_env.get_template(
+            'template.phenotype.meta.json'
+        )
+
+    def __process_one_clinical_supplement(self, path):
+        """Extract info from GDC's TCGA BCR XML clinical supplement and
+        re-organize them into a pandas DataFrame.
+
+        Args:
+            path (str): XML file of GDC's TCGA clinical supplement.
+
+        Returns:
+            pandas.core.frame.DataFrame: Transformed pandas DataFrame.
+        """
+
+        # Sanity check on TCGA phenotype clinical supplement file
+        ext = os.path.splitext(path)[1]
+        if ext != '.xml':
+            raise IOError(
+                'Unknown file type for TCGA clinical data: {}'.format(ext)
+            )
+
+        root = etree.parse(path).getroot()
+        ns = root.nsmap
+        assert (
+            'clinical'
+            in root.xpath('@xsi:schemaLocation', namespaces=ns)[0].lower()
+        )
+        patient = {}
+        # "Dirty" extraction
+        for child in root.xpath('.//*[not(*)]'):
+            try:
+                patient[child.tag.split('}', 1)[-1]] = child.text.strip()
+            except AttributeError:
+                patient[child.tag.split('}', 1)[-1]] = ''
+        # Redo 'race'
+        if 'race_list' in patient:
+            del patient['race_list']
+        try:
+            patient['race'] = ','.join(
+                [
+                    child.text.strip()
+                    for child in root.find('.//clin_shared:race_list', ns)
+                    if child.text and child.text.strip()
+                ]
+            )
+        except Exception:
+            patient['race'] = ''
+        # Redo the most recent "follow_up" and update the patient dict if there
+        # is an overlapped key.
+        follow_ups = root.xpath('.//*[local-name()="follow_up"]')
+        if follow_ups:
+            most_recent = follow_ups[0]
+            for follow_up in follow_ups:
+                if follow_up.attrib['version'] > most_recent.attrib['version']:
+                    most_recent = follow_up
+            for child in most_recent:
+                try:
+                    patient[child.tag.split('}', 1)[-1]] = child.text.strip()
+                except AttributeError:
+                    patient[child.tag.split('}', 1)[-1]] = ''
+        return pd.DataFrame({patient['bcr_patient_barcode']: patient}).T
+
+    def transform(self):
+        """Transform TCGA phenotype data into Xena matrix.
+
+        Raw clinical data will first be transformed individually. Then more
+        phenotype data will be retrieved and transfromed from GDC API. After
+        both types of data are transformed into Xena matrices, two matrices
+        will be merged on "cases.submitter_id". Finally, normal samples and
+        samples without genomic data will be removed.
+
+        Returns:
+            self: allow method chaining.
+        """
+
+        message = 'Make Xena matrix for raw clinical data of {}.'
+        print(message.format(self.projects))
+        total = len(self.raw_data_list)
+        count = 0
+        clin_dfs = []
+        for path in self.raw_data_list:
+            count = count + 1
+            print('\rProcessing {}/{} file...'.format(count, total), end='')
+            sys.stdout.flush()
+            clin_dfs.append(self.__process_one_clinical_supplement(path))
+        print('\rAll {} files have been processed. '.format(total))
+        xena_matrix = (
+            pd.concat(clin_dfs, axis=0)
+            .replace(r'\r\n', ' ', regex=True)
+            .replace(r'^\s*$', np.nan, regex=True)
+            .dropna(axis=1, how='all')
+            .rename_axis('submitter_id')
+            .reset_index()
+        )
+        # Query GDC API for GDC harmonized phenotype info
+        api_clin = gdc.get_samples_clinical(self.projects)
+        # Revert hierarchy order in column names
+        api_clin = api_clin.rename(
+            columns={
+                n: '.'.join(reversed(n.split('.')))
+                for n in api_clin.columns
+            }
+        )
+        # For overlapping columns between raw data matrix and GDC'S
+        # API data matrix, use API data.
+        for c in self._API_DROPS:
+            try:
+                api_clin.drop(c, axis=1, inplace=True)
+            except Exception:
+                pass
+        for c in self._RAW_DROPS:
+            try:
+                xena_matrix.drop(c, axis=1, inplace=True)
+            except Exception:
+                pass
+        xena_matrix = (
+            pd.merge(
+                xena_matrix,
+                api_clin,
+                how='outer',
+                on='submitter_id',
+            )
+            .replace(r'^\s*$', np.nan, regex=True)
+        )
+        associated_data_map = gdc.map_two_fields(
+            'files',
+            'cases.samples.submitter_id',
+            'data_category',
+            input_values=xena_matrix['submitter_id.samples'].tolist()
+        )
+        sample_type_map = gdc.map_two_fields(
+            'cases',
+            'samples.submitter_id',
+            'samples.sample_type_id',
+            input_values=xena_matrix['submitter_id.samples'].tolist()
+        )
+        sample_mask = xena_matrix['submitter_id.samples'].map(
+            lambda s: sample_type_map[s][0] != '10' and any(
+                dcat not in ['Biospecimen', 'Clinical']
+                for dcat in associated_data_map[s]
+            )
+        )
+        xena_matrix = xena_matrix[sample_mask].set_index(
+            'submitter_id.samples'
+        ).dropna(axis=1, how='all')
+        # Transformation done
+        print('\rSaving matrix to {} ...'.format(self.matrix), end='')
+        mkdir_p(self.matrix_dir)
+        xena_matrix.to_csv(self.matrix, sep='\t', encoding='utf-8')
+        print('\rXena matrix is saved at {}.'.format(self.matrix))
+        return self
+
+
 def main():
     print('A python module of Xena specific importing pipeline for GDC data.')
     start = time.time()