diff --git a/mpcontribs-portal/notebooks/contribs.materialsproject.org/dilute_solute_diffusion.ipynb b/mpcontribs-portal/notebooks/contribs.materialsproject.org/dilute_solute_diffusion.ipynb
index d1b52c173..3ce892c23 100644
--- a/mpcontribs-portal/notebooks/contribs.materialsproject.org/dilute_solute_diffusion.ipynb
+++ b/mpcontribs-portal/notebooks/contribs.materialsproject.org/dilute_solute_diffusion.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "dated-confirmation",
    "metadata": {},
    "outputs": [],
@@ -12,13 +12,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "forty-florida",
    "metadata": {},
    "outputs": [],
    "source": [
     "name = \"dilute_solute_diffusion\"\n",
-    "client = Client()"
+    "client = Client(project=name)"
    ]
   },
   {
@@ -30,17 +30,17 @@
    "source": [
     "import os, json, requests, sys\n",
     "from pandas import read_excel, isnull, ExcelWriter, Series\n",
-    "from mpcontribs.io.core.recdict import RecursiveDict\n",
-    "from mpcontribs.io.core.utils import clean_value, nest_dict\n",
-    "from mpcontribs.io.archieml.mpfile import MPFile\n",
-    "from pymatgen.ext.matproj import MPRester\n",
+    "from mp_api.client import MPRester\n",
+    "from pathlib import Path\n",
     "\n",
-    "z = json.load(open(\"z.json\", \"r\"))\n",
-    "mpr = MPRester()\n",
+    "data_dir = Path(\"/Users/patrick/GoogleDriveLBNL/My Drive/MaterialsProject/gitrepos/mpcontribs-data/\")\n",
+    "zfile = data_dir / name / \"z.json\"\n",
+    "z = json.load(zfile.open())\n",
+    "mpr = MPRester(\"bmdNL4cV6Ei0CqhUAhK6JwFSZ6XMH0Gz\")\n",
     "fpath = f\"{name}.xlsx\"\n",
+    "download = False\n",
     "\n",
     "if download or not os.path.exists(fpath):\n",
-    "\n",
     "    figshare_id = 1546772\n",
     "    url = \"https://api.figshare.com/v2/articles/{}\".format(figshare_id)\n",
     "    print(\"get figshare article {}\".format(figshare_id))\n",
@@ -53,22 +53,46 @@
     "    for d in figshare[\"files\"]:\n",
     "        if \"xlsx\" in d[\"name\"]:\n",
     "            # Dict of DataFrames is returned, with keys representing sheets\n",
-    "            df_dct = read_excel(d[\"download_url\"], sheet_name=None)\n",
+    "            df_dct = read_excel(d[\"download_url\"], sheet_name=None, engine=\"openpyxl\")\n",
     "            break\n",
-    "    if df_dct is None:\n",
-    "        print(\"no excel sheet found on figshare\")\n",
-    "        return\n",
-    "\n",
-    "    print(\"save excel to disk\")\n",
-    "    writer = ExcelWriter(fpath)\n",
-    "    for sheet, df in df_dct.items():\n",
-    "        df.to_excel(writer, sheet)\n",
-    "    writer.save()\n",
-    "\n",
+    "    if df_dct is not None:\n",
+    "        print(\"save excel to disk\")\n",
+    "        with ExcelWriter(fpath) as writer:\n",
+    "            for sheet, df in df_dct.items():\n",
+    "                df.to_excel(writer, sheet)\n",
+    "    else:\n",
+    "        print(\"no excel sheet found on figshare\")    \n",
     "else:\n",
-    "    df_dct = read_excel(fpath, sheet_name=None)\n",
+    "    df_dct = read_excel(fpath, sheet_name=None, engine=\"openpyxl\")\n",
     "\n",
-    "print(len(df_dct), \"sheets loaded.\")\n"
+    "print(len(df_dct), \"sheets loaded.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c919df10-44ee-4d3c-b003-e026ac56bfbf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# function to search MP via its summary API endpoint\n",
+    "def search(formula=None, spacegroup_number=None, chemsys=None):\n",
+    "    return mpr.summary.search(\n",
+    "        formula=formula, chemsys=chemsys, spacegroup_number=spacegroup_number,\n",
+    "        fields=[\"material_id\"]#, sort_fields=\"energy_above_hull\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "827b60ac-f149-4962-a7c0-7c3b829f266e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "host_info = df_dct[\"Host Information\"].set_index(\"Host element name\").dropna().drop(\"Unnamed: 0\", axis=1)\n",
+    "hosts = None\n",
+    "host_info"
    ]
   },
   {
@@ -78,13 +102,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "contributions = []\n",
     "\n",
-    "print(\"looping hosts ...\")\n",
-    "host_info = df_dct[\"Host Information\"]\n",
-    "host_info.set_index(host_info.columns[0], inplace=True)\n",
-    "host_info.dropna(inplace=True)\n",
-    "\n",
-    "for idx, host in enumerate(host_info):\n",
+    "for idx, host in enumerate(host_info.columns):\n",
     "    if hosts is not None:\n",
     "        if isinstance(hosts, int) and idx + 1 > hosts:\n",
     "            break\n",
@@ -92,48 +112,46 @@
     "            continue\n",
     "\n",
     "    print(\"get mp-id for {}\".format(host))\n",
-    "    mpid = None\n",
-    "    for doc in mpr.query(\n",
-    "        criteria={\"pretty_formula\": host}, properties={\"task_id\": 1}\n",
-    "    ):\n",
-    "        if \"decomposes_to\" not in doc[\"sbxd\"][0]:\n",
-    "            mpid = doc[\"task_id\"]\n",
-    "            break\n",
-    "    if mpid is None:\n",
+    "    results = search(formula=host)\n",
+    "    if not results:\n",
     "        print(\"mp-id for {} not found\".format(host))\n",
     "        continue\n",
     "\n",
+    "    mpid = str(results[0].material_id)\n",
+    "    contrib = {\"identifier\": mpid}\n",
     "    print(\"add host info for {}\".format(mpid))\n",
-    "    hdata = host_info[host].to_dict(into=RecursiveDict)\n",
+    "    hdata = host_info[host].to_dict()\n",
     "    for k in list(hdata.keys()):\n",
     "        v = hdata.pop(k)\n",
     "        ks = k.split()\n",
     "        if ks[0] not in hdata:\n",
-    "            hdata[ks[0]] = RecursiveDict()\n",
+    "            hdata[ks[0]] = {}\n",
     "        unit = ks[-1][1:-1] if ks[-1].startswith(\"[\") else \"\"\n",
     "        subkey = \"_\".join(ks[1:-1] if unit else ks[1:]).split(\",\")[0]\n",
     "        if subkey == \"lattice_constant\":\n",
     "            unit = \"Å\"\n",
     "        try:\n",
-    "            hdata[ks[0]][subkey] = clean_value(v, unit.replace(\"angstrom\", \"Å\"))\n",
+    "            unit = unit.replace(\"angstrom\", \"Å\")\n",
+    "            hdata[ks[0]][subkey] = f\"{v} {unit}\" if unit else v\n",
     "        except ValueError:\n",
     "            hdata[ks[0]][subkey] = v\n",
-    "    hdata[\"formula\"] = host\n",
-    "    df = df_dct[\"{}-X\".format(host)]\n",
-    "    rows = list(isnull(df).any(1).nonzero()[0])\n",
+    "    contrib[\"formula\"] = host\n",
+    "    df = df_dct[\"{}-X\".format(host)].drop(\"Unnamed: 0\", axis=1)\n",
+    "    rows = list(isnull(df).any(axis=1).to_numpy().nonzero()[0])\n",
     "    if rows:\n",
     "        cells = df.iloc[rows].dropna(how=\"all\").dropna(axis=1)[df.columns[0]]\n",
     "        note = cells.iloc[0].replace(\"following\", cells.iloc[1])[:-1]\n",
     "        hdata[\"note\"] = note\n",
-    "        df.drop(rows, inplace=True)\n",
-    "    mpfile.add_hierarchical_data(nest_dict(hdata, [\"data\"]), identifier=mpid)\n",
+    "        df = df.drop(rows)\n",
     "\n",
+    "    contrib[\"data\"] = hdata\n",
+    "    \n",
     "    print(\"add table for D₀/Q data for {}\".format(mpid))\n",
-    "    df.set_index(df[\"Solute element number\"], inplace=True)\n",
-    "    df.drop(\"Solute element number\", axis=1, inplace=True)\n",
+    "    df = df.set_index(df[\"Solute element number\"])\n",
+    "    df = df.drop(\"Solute element number\", axis=1)\n",
     "    df.columns = df.iloc[0]\n",
     "    df.index.name = \"index\"\n",
-    "    df.drop(\"Solute element name\", inplace=True)\n",
+    "    df = df.drop(\"Solute element name\")\n",
     "    df = df.T.reset_index()\n",
     "    if str(host) == \"Fe\":\n",
     "        df_D0_Q = df[\n",
@@ -153,15 +171,23 @@
     "        ]\n",
     "    else:\n",
     "        df_D0_Q = df[[\"Solute element name\", \"Solute D0 [cm^2/s]\", \"Solute Q [eV]\"]]\n",
+    "\n",
     "    df_D0_Q.columns = [\"Solute\", \"D₀ [cm²/s]\", \"Q [eV]\"]\n",
     "    anums = [z[el] for el in df_D0_Q[\"Solute\"]]\n",
     "    df_D0_Q.insert(0, \"Z\", Series(anums, index=df_D0_Q.index))\n",
-    "    df_D0_Q.sort_values(\"Z\", inplace=True)\n",
-    "    df_D0_Q.reset_index(drop=True, inplace=True)\n",
-    "    mpfile.add_data_table(mpid, df_D0_Q, \"D₀_Q\")\n",
+    "    df_D0_Q = df_D0_Q.sort_values(\"Z\")\n",
+    "    df_D0_Q = df_D0_Q.reset_index(drop=True)\n",
+    "    df_D0_Q.attrs = {\n",
+    "        \"name\": \"D0_Q\",\n",
+    "        \"title\": \"D₀/Q by Solute\",\n",
+    "        \"labels\": {\n",
+    "            \"value\": \"D₀/Q\",\n",
+    "            #\"variable\": \"method\"\n",
+    "        }\n",
+    "    }\n",
+    "    contrib[\"tables\"] = [df_D0_Q]\n",
     "\n",
     "    if hdata[\"Host\"][\"crystal_structure\"] == \"BCC\":\n",
-    "\n",
     "        print(\"add table for hop activation barriers for {} (BCC)\".format(mpid))\n",
     "        columns_E = (\n",
     "            [\"Hop activation barrier, E_{} [eV]\".format(i) for i in range(2, 5)]\n",
@@ -177,7 +203,11 @@
     "            + [\"E``{} [eV]\".format(i) for i in [\"₃\", \"₄\"]]\n",
     "            + [\"E{} [eV]\".format(i) for i in [\"₅\", \"₆\"]]\n",
     "        )\n",
-    "        mpfile.add_data_table(mpid, df_E, \"hop_activation_barriers\")\n",
+    "        df_E.attrs = {\n",
+    "            \"name\": \"hop_activation_barriers\",\n",
+    "            \"title\": \"Hop Activation Barriers\",\n",
+    "        }\n",
+    "        contrib[\"tables\"].append(df_E)\n",
     "\n",
     "        print(\"add table for hop attempt frequencies for {} (BCC)\".format(mpid))\n",
     "        columns_v = (\n",
@@ -194,7 +224,11 @@
     "            + [\"v``{} [THz]\".format(i) for i in [\"₃\", \"₄\"]]\n",
     "            + [\"v{} [THz]\".format(i) for i in [\"₅\", \"₆\"]]\n",
     "        )\n",
-    "        mpfile.add_data_table(mpid, df_v, \"hop_attempt_frequencies\")\n",
+    "        df_v.attrs = {\n",
+    "            \"name\": \"hop_attempt_frequencies\",\n",
+    "            \"title\": \"Hop Attempt Frequencies\",\n",
+    "        }\n",
+    "        contrib[\"tables\"].append(df_v)\n",
     "\n",
     "    elif hdata[\"Host\"][\"crystal_structure\"] == \"FCC\":\n",
     "\n",
@@ -206,7 +240,11 @@
     "        df_E.columns = [\"Solute\"] + [\n",
     "            \"E{} [eV]\".format(i) for i in [\"₀\", \"₁\", \"₂\", \"₃\", \"₄\"]\n",
     "        ]\n",
-    "        mpfile.add_data_table(mpid, df_E, \"hop_activation_barriers\")\n",
+    "        df_E.attrs = {\n",
+    "            \"name\": \"hop_activation_barriers\",\n",
+    "            \"title\": \"Hop Activation Barriers\",\n",
+    "        }\n",
+    "        contrib[\"tables\"].append(df_E)\n",
     "\n",
     "        print(\"add table for hop attempt frequencies for {} (FCC)\".format(mpid))\n",
     "        columns_v = [\n",
@@ -216,7 +254,11 @@
     "        df_v.columns = [\"Solute\"] + [\n",
     "            \"v{} [THz]\".format(i) for i in [\"₀\", \"₁\", \"₂\", \"₃\", \"₄\"]\n",
     "        ]\n",
-    "        mpfile.add_data_table(mpid, df_v, \"hop_attempt_frequencies\")\n",
+    "        df_v.attrs = {\n",
+    "            \"name\": \"hop_attempt_frequencies\",\n",
+    "            \"title\": \"Hop Attempt Frequencies\",\n",
+    "        }\n",
+    "        contrib[\"tables\"].append(df_v)\n",
     "\n",
     "    elif hdata[\"Host\"][\"crystal_structure\"] == \"HCP\":\n",
     "\n",
@@ -242,7 +284,11 @@
     "            \"Eꪱ [eV]\",\n",
     "            \"E`ꪱ [eV]\",\n",
     "        ]\n",
-    "        mpfile.add_data_table(mpid, df_E, \"hop_activation_barriers\")\n",
+    "        df_E.attrs = {\n",
+    "            \"name\": \"hop_activation_barriers\",\n",
+    "            \"title\": \"Hop Activation Barriers\",\n",
+    "        }\n",
+    "        contrib[\"tables\"].append(df_E)\n",
     "\n",
     "        print(\"add table for hop attempt frequencies for {} (HCP)\".format(mpid))\n",
     "        columns_v = [\"Hop attempt frequency, v_a [THz]\"] + [\n",
@@ -250,56 +296,96 @@
     "        ]\n",
     "        df_v = df[[\"Solute element name\"] + columns_v]\n",
     "        df_v.columns = [\"Solute\"] + [\"vₐ [THz]\"] + [\"vₓ [THz]\"]\n",
-    "        mpfile.add_data_table(mpid, df_v, \"hop_attempt_frequencies\")\n",
+    "        df_v.attrs = {\n",
+    "            \"name\": \"hop_attempt_frequencies\",\n",
+    "            \"title\": \"Hop Attempt Frequencies\",\n",
+    "        }\n",
+    "        contrib[\"tables\"].append(df_v)\n",
     "\n",
-    "print(\"DONE\")\n",
+    "    contributions.append(contrib)\n",
     "\n",
+    "len(contributions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d978f45a-f160-4bd4-89b9-9d3eef7e6449",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from flatten_dict import flatten, unflatten\n",
     "\n",
-    "mpfile = MPFile()\n",
-    "mpfile.max_contribs = 15\n",
-    "run(mpfile)\n",
-    "print(mpfile)\n",
-    "\n",
-    "filename = f\"{project}.txt\"\n",
-    "mpfile.write_file(filename=filename)\n",
-    "mpfile = MPFile.from_file(filename)\n",
-    "print(len(mpfile.ids))\n",
-    "\n",
-    "table_names = [\"D₀_Q\", \"hop_activation_barriers\", \"hop_attempt_frequencies\"]\n",
-    "\n",
-    "for idx, (identifier, content) in enumerate(mpfile.document.items()):\n",
-    "    # doc = {'identifier': identifier, 'project': project, 'content': {}}\n",
-    "    # doc['content']['data'] = content['data']\n",
-    "    # doc['collaborators'] = [{'name': 'Patrick Huck', 'email': 'phuck@lbl.gov'}]\n",
-    "    # r = db.contributions.insert_one(doc)\n",
-    "    # cid = r.inserted_id\n",
-    "    # print(idx, ':', cid)\n",
+    "columns_map = {\n",
+    "    \"Host.crystal_structure\": {\"name\": \"host.symmetry\", \"description\": \"host crystal structure\"},\n",
+    "    \"Host.melting_temperature\": {\"name\": \"host.temperature|melt\", \"unit\": \"K\", \"description\": \"host melting temperature\"},\n",
+    "    \"Host.vacancy_formation_energy\": {\"name\": \"host.energy|formation\", \"unit\": \"eV\", \"description\": \"host vacancy formation energy\"},\n",
+    "    \"Host.lattice_constant\": {\"name\": \"host.lattice\", \"unit\": \"Å\", \"description\": \"host lattice constant\"},\n",
+    "    \"Host.self-diffusion_correction_shift\": {\"name\": \"host.shift\", \"unit\": \"eV\", \"description\": \"host self diffusion correction shift\"},\n",
+    "    \"note\": {\"name\": \"excluded\", \"description\": \"solutes were calculated but either did not converge or relaxed into the neighboring vacancy, making it ineligible for the analytical multi-frequency formalism\"},\n",
+    "}\n",
+    "columns = {col[\"name\"]: col.get(\"unit\") for col in columns_map.values()}\n",
+    "clean_contributions = []\n",
     "\n",
-    "    # tids = []\n",
-    "    # for name in table_names:\n",
-    "    #    table = mpfile.document[identifier][name]\n",
-    "    #    table.pop('@module')\n",
-    "    #    table.pop('@class')\n",
-    "    #    table['identifier'] = identifier\n",
-    "    #    table['project'] = project\n",
-    "    #    table['name'] = name\n",
-    "    #    table['cid'] = cid\n",
-    "    #    r = db.tables.insert_one(table)\n",
-    "    #    tids.append(r.inserted_id)\n",
+    "for contrib in contributions:\n",
+    "    clean_contrib = {\"identifier\": contrib[\"identifier\"], \"formula\": contrib[\"formula\"], \"tables\": contrib[\"tables\"]}\n",
+    "    data = {}\n",
+    "    for k, v in flatten(contrib[\"data\"], reducer=\"dot\").items():\n",
+    "        data[columns_map[k][\"name\"]] = v.replace(\"The\", \"\").replace(columns_map[k][\"description\"], \"\").replace(\n",
+    "            \"solutes were calculated but either did not converge or relaxed into the neighboring vacancy, making the solute ineligible for the analytical multi-frequency formalism\", \"\"\n",
+    "        ).strip()\n",
     "\n",
-    "    # print(tids)\n",
-    "    # query = {'identifier': identifier, 'project': project}\n",
-    "    # r = db.contributions.update_one(query, {'$set': {'content.tables': tids}})\n",
+    "    clean_contrib[\"data\"] = unflatten(data, splitter=\"dot\")\n",
+    "    clean_contributions.append(clean_contrib)\n",
     "\n",
-    "    name = table_names[0]\n",
-    "    query = {\"identifier\": identifier, \"project\": project, \"name\": name}\n",
-    "    print(query)\n",
-    "    table = mpfile.document[identifier][name]\n",
-    "    r = db.tables.update_one(\n",
-    "        query, {\"$set\": {\"columns\": table[\"columns\"], \"data\": table[\"data\"]}}\n",
-    "    )\n",
-    "    print(r.matched_count, r.modified_count)\n"
+    "len(clean_contributions)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "017ad9b3-85ea-4571-9d2f-ada20e66e6b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# description = client.get_project(fields=[\"description\"]).get(\"description\")\n",
+    "# description += \" Diffusion values for Fe-X are given for the α-BCC phase, both paramagnetic and ferromagnetic. The paramagnetic D₀ and Q are given here, the full diffusivity can be obtained by: D|BCC(T) = D₀|para * exp[-Q|para*(1+αs²)/(kT)] where α=0.156 and s is the temperature dependent spontaneous magnetization of Fe relative to T=0K.\"\n",
+    "# description += \" NSF award No. 1148011, version 10.\"\n",
+    "# client.update_project({\"description\": description})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2e726cd-e11d-409f-acaf-d82cff3e9c52",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# other = unflatten({col[\"name\"]: col[\"description\"] for col in columns_map.values()}, splitter=\"dot\")\n",
+    "# #client.update_project({\"other\": {\"funding\": None, \"version\": None, \"abbreviations\": None, \"FeX\": None}})\n",
+    "# client.update_project({\"other\": other})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a55fde6b-1566-4d2a-9be1-ea4fc0f9e7f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.delete_contributions()\n",
+    "client.init_columns(columns)\n",
+    "client.submit_contributions(clean_contributions)\n",
+    "client.init_columns(columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6bce3aa4-aabe-46d3-8ae4-2e1119077812",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {