diff --git a/Project.toml b/Project.toml index 481ba63ee..d3dfb918f 100755 --- a/Project.toml +++ b/Project.toml @@ -21,11 +21,13 @@ MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" @@ -40,10 +42,9 @@ LaplaceReduxExt = "LaplaceRedux" MPIExt = "MPI" [compat] +CUDA = "3, 4, 5" CategoricalArrays = "0.10" ChainRulesCore = "1.15" -CUDA = "3, 4, 5" -cuDNN = "1" DataFrames = "1" DecisionTree = "0.12.3" Distributions = "0.25.97" @@ -57,6 +58,7 @@ MLJBase = "0.21, 1" MLJDecisionTreeInterface = "0.4.0" MLUtils = "0.2, 0.3, 0.4" MPI = "0.20" +MPIPreferences = "0.2" MultivariateStats = "0.9, 0.10" PackageExtensionCompat = "1" Parameters = "0.12" @@ -66,9 +68,10 @@ Serialization = "1.6, 1.7, 1.8, 1.9, 1.10" Statistics = "1" StatsBase = "0.33, 0.34" Tables = "1" +Transformers = "0.2.8" UUIDs = "1.6, 1.7, 1.8, 1.9, 1.10" +cuDNN = "1" julia = "1.6, 1.7, 1.8, 1.9, 1.10" -MPIPreferences = "0.2" [extras] EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" diff --git a/dev/notebooks/.gitignore b/dev/notebooks/.gitignore new file mode 100644 index 000000000..95afe1b38 --- /dev/null +++ b/dev/notebooks/.gitignore @@ -0,0 +1,3 @@ +.ipynb_checkpoints +.CondaPkg +model \ No newline at end of file diff --git a/dev/notebooks/CondaPkg.toml b/dev/notebooks/CondaPkg.toml new file mode 100644 index 000000000..ad30c47dc --- /dev/null +++ b/dev/notebooks/CondaPkg.toml @@ -0,0 +1,9 @@ +channels = ["anaconda", "pytorch", "nvidia", "conda-forge"] + +[deps] +pytorch-cuda = "12.1" +cudnn = "" +pytorch = "" +transformers-interpret = "" +python = ">=3.8,<4" +transformers = "4.15.0" diff --git a/dev/notebooks/Dataset_masking.ipynb b/dev/notebooks/Dataset_masking.ipynb new file mode 100644 index 000000000..f8bd79b41 --- /dev/null +++ b/dev/notebooks/Dataset_masking.ipynb @@ -0,0 +1,97 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "14e30426-1457-4c05-ba3f-272e9241b139", + "metadata": {}, + "outputs": [], + "source": [ + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "30228c72-e074-43ab-9b28-129c5811963e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], tail, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, tail, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bert_enc = hgf\"bert-base-uncased:tokenizer\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "96d7bb3c-85a8-4fdd-bd72-c98ee6904758", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(token = Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0;;; 0 0 … 1 1; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0], segment = [1 1; 1 1; … ; 1 1; 1 1], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[11, 9]))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encode(bert_enc, [\"hello [MASK] world [MASK] [MASK] and my [MASK]!\", \"bonjour mes [MASK].\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72aefb08-9c24-4c99-afe6-f3cafe743c22", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/Load_CMLM.ipynb b/dev/notebooks/Load_CMLM.ipynb new file mode 100644 index 000000000..6949dacdb --- /dev/null +++ b/dev/notebooks/Load_CMLM.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "f5b3440e-acd9-4c06-ad66-acd36c663ade", + "metadata": {}, + "outputs": [], + "source": [ + "using Transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "96c619a7-9c69-4952-9b7f-a02509f7b896", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "load_model (generic function with 1 method)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function load_model(; load_head = true, kwrgs...)\n", + " model_name = \"karoldobiczek/relitc-FOMC-CMLM\"\n", + " tkr = Transformers.load_tokenizer(model_name)\n", + " cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...)\n", + " mod = Transformers.load_model(model_name, \"ForMaskedLM\"; config = cfg)\n", + "\n", + " return tkr, mod, cfg\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a1fa980d-7508-469d-8f0c-db26bd82cc40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], head, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, head, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tkr, model = load_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0f83d8a1-bb74-445c-ba2f-908fe19f5149", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(token = Bool[0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0], segment = [1, 1, 1, 1, 1, 1], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[6]))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "input = TextEncoders.encode(tkr, \"[SEP] hello world!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3caedfa7-6d74-468a-8d58-42cc0ace447c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(hidden_state = Float32[-0.0068912334 1.0873084 … -0.60200036 1.0715828; 0.12056827 0.10844539 … -0.6849348 0.116397835; … ; -0.026128935 -0.5781372 … -0.010042516 -0.5654973; 0.13880502 -0.25988412 … -0.09458274 -0.27784675;;;], attention_mask = NeuralAttentionlib.LengthMask{1, Vector{Int32}}(Int32[6]), logit = Float32[-6.8121076 -13.4576 … -11.932923 -13.186207; -6.7392855 -13.34565 … -12.13822 -13.120222; … ; -6.0473905 -10.81515 … -10.769718 -10.597319; -4.2169976 -12.677718 … -3.9266496 -12.219977;;;])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = model(input)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "af5e931b-d4cd-4d65-a3e8-79b43bc60f58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6×1 Matrix{String}:\n", + " \".\"\n", + " \".\"\n", + " \"hello\"\n", + " \"world\"\n", + " \"!\"\n", + " \".\"" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "TextEncoders.decode(tkr, out.logit)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "666d9c78-b952-43f0-b1c0-764520bb2acd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/Project.toml b/dev/notebooks/Project.toml new file mode 100644 index 000000000..00b473db3 --- /dev/null +++ b/dev/notebooks/Project.toml @@ -0,0 +1,16 @@ +[deps] +CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645" +CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" +CounterfactualExplanations = "2f13d31b-18db-44c1-bc43-ebaf2cff0be0" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +PreferenceTools = "ba661fbb-e901-4445-b070-854aec6bfbc5" +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +TaijaData = "9d524318-b4e6-4a65-86d2-b2b72d07866c" +TaijaPlotting = "bd7198b4-c7d6-400c-9bab-9a24614b0240" +Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4" +TrillionDollarWords = "d66529d5-f4f4-49d9-a69b-da67f5535f0a" +cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/dev/notebooks/RELITC.ipynb b/dev/notebooks/RELITC.ipynb new file mode 100644 index 000000000..a11b07150 --- /dev/null +++ b/dev/notebooks/RELITC.ipynb @@ -0,0 +1,1370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e7aa29e0-5b18-477d-bee4-9d9b42aedc7e", + "metadata": {}, + "source": [ + "## RELITC\n", + "[Relevance-based Infilling for Natural Language Counterfactuals](https://dl.acm.org/doi/10.1145/3583780.3615029) (RELITC) is a Language Model counterfactual explanation method. It uses LM feature attributions to identify tokens in the original text which contribute the most to the LM classification. Once idendified, the tokens are masked and a Conditional Masked LM (CMLM), like BERT is used to fill the masks, creating a new text that should be classified to the target class. RELITC additionally tries to quantify the uncertainty of the CMLM to guide the infilling process. The only parameter of the method, $K$, percentage of masked tokens is established through beam search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a521610f-12dc-4e6e-80e7-b51328794d51", + "metadata": {}, + "outputs": [], + "source": [ + "# using Pkg\n", + "# Pkg.add(\"CUDNN_jll\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0de19327-1494-4f9c-a799-d5551fac5826", + "metadata": {}, + "outputs": [], + "source": [ + "using DataFrames\n", + "using Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace\n", + "using TrillionDollarWords\n", + "using StatsBase\n", + "using Flux" + ] + }, + { + "cell_type": "markdown", + "id": "b4e29646-4837-4121-a9fe-6426a352811e", + "metadata": {}, + "source": [ + "### Load data\n", + "The data used here is from the [Trillion Dollar Words](https://aclanthology.org/2023.acl-long.368/) dataset using the [TrillionDollarWords.jl](https://github.com/pat-alt/TrillionDollarWords.jl) package." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a9c6c853-d63f-4f48-b188-0d12d9a11be0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
10×7 DataFrame
Rowsentenceyearlabelseedsentence_splittingevent_typesplit
StringInt64String7Int64BoolString31String7
1remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation.1996hawkish5768truemeeting minutestest
2A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains.1996neutral5768truemeeting minutestest
3inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year.1996neutral5768truemeeting minutestest
4They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run.1996neutral5768truemeeting minutestest
5In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action.1996hawkish5768truemeeting minutestest
6In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together.1996neutral5768truemeeting minutestest
7In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions.1996dovish5768truemeeting minutestest
8Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated.1996dovish5768truemeeting minutestest
9Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization.1996hawkish5768truemeeting minutestest
10Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued.1996neutral5768truemeeting minutestest
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& sentence & \\\\\n", + "\t\\hline\n", + "\t& String & \\\\\n", + "\t\\hline\n", + "\t1 & remained well below their levels at the beginning of the year, and that weaker demand and earlier declines in oil prices had been holding down consumer price inflation. & $\\dots$ \\\\\n", + "\t2 & A few participants also noted that uncertainty about the extent of resource slack in the economy was considerable and that it was quite possible that the economy could soon be operating close to potential, particularly if labor force participation rates did not turn up much while employment continued to register gains. & $\\dots$ \\\\\n", + "\t3 & inflation was projected to pick up gradually in association with a partial reversal of the decline in energy prices this year. & $\\dots$ \\\\\n", + "\t4 & They noted that the realization of such a development could make it harder for the Committee to achieve 2 percent inflation over the longer run. & $\\dots$ \\\\\n", + "\t5 & In the view of one member, however, aggregate final demand was so strong that, with economic activity and the associated demand for labor having expanded at an unsustainable pace for some time, one could be reasonably confident that inflation would most likely pick up in the absence of policy action. & $\\dots$ \\\\\n", + "\t6 & In the circumstances, most members endorsed a proposal to delete as no longer necessary the previous summary statement relating to the risks to growth and inflation taken together. & $\\dots$ \\\\\n", + "\t7 & In the staff forecast prepared for this meeting, the economy was seen as likely to expand at a moderate pace, supported by accommodative monetary policy and financial conditions. & $\\dots$ \\\\\n", + "\t8 & Housing starts and the demand for new homes had declined further, house prices in many parts of the country were falling faster than they had towards the end of 2007, and inventories of unsold homes remained quite elevated. & $\\dots$ \\\\\n", + "\t9 & Pressures on resources would rise as the anticipated upturn and possible above-trend growth brought the economy closer to full capacity utilization. & $\\dots$ \\\\\n", + "\t10 & Price inflation had picked up a little but, abstracting from energy, had remained relatively subdued. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m10×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence \u001b[0m\u001b[1m year \u001b[0m\u001b[1m label \u001b[0m\u001b[1m seed \u001b[0m\u001b[1m sentence_spli\u001b[0m ⋯\n", + " │\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Bool \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ remained well below their levels… 1996 hawkish 5768 ⋯\n", + " 2 │ A few participants also noted th… 1996 neutral 5768\n", + " 3 │ inflation was projected to pick … 1996 neutral 5768\n", + " 4 │ They noted that the realization … 1996 neutral 5768\n", + " 5 │ In the view of one member, howev… 1996 hawkish 5768 ⋯\n", + " 6 │ In the circumstances, most membe… 1996 neutral 5768\n", + " 7 │ In the staff forecast prepared f… 1996 dovish 5768\n", + " 8 │ Housing starts and the demand fo… 1996 dovish 5768\n", + " 9 │ Pressures on resources would ris… 1996 hawkish 5768 ⋯\n", + " 10 │ Price inflation had picked up a … 1996 neutral 5768\n", + "\u001b[36m 3 columns omitted\u001b[0m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[90m Pkg.update() complete \u001b[39m\n" + ] + } + ], + "source": [ + "n = 10\n", + "data = load_training_sentences()\n", + "texts = filter(:split => n -> n == \"test\", data)[1:n, :]" + ] + }, + { + "cell_type": "markdown", + "id": "c716f911-920d-468b-92e8-8ca639367303", + "metadata": {}, + "source": [ + "### Get attributions\n", + "The feature attributions are computed using the transformers-interpret Python library and loaded to Julia using PythonCall.\n", + "\n", + "The `scorer` outputs a per-token score of the degree of contribution to a specified class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e00a7c85-2a74-41bf-ad75-899f3317dac9", + "metadata": {}, + "outputs": [], + "source": [ + "# Install necessary dependencies\n", + "# using CondaPkg\n", + "# CondaPkg.add(\"pytorch\")\n", + "# CondaPkg.add(\"transformers\"; version=\"4.15.0\")\n", + "# CondaPkg.add(\"transformers-interpret\")\n", + "# CondaPkg.add(\"cuDNN\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ff425216-c8a3-49d7-ba21-b89da26e7b4d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "SYSTEM: caught exception of type :MethodError while trying to print a failed Task notice; giving up\n" + ] + }, + { + "data": { + "text/plain": [ + "Python: " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "using PythonCall\n", + "\n", + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a605169-6109-41fb-973b-7d5a044e71f1", + "metadata": {}, + "outputs": [], + "source": [ + "# Load pre-trained classifier and corresponding tokenizer\n", + "# classifier = \"gtfintechlab/FOMC-RoBERTa\"\n", + "classifier = \"karoldobiczek/roberta-base_fomc\"\n", + "\n", + "println(\"loading\")\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(classifier).cuda()\n", + "println(\"model done\")\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(classifier)\n", + "println(\"tok done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d7692016-104c-402c-87c2-2f461d004aee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scorer done\n" + ] + } + ], + "source": [ + "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")\n", + "println(\"scorer done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f5eb6b90-e0e4-4d11-9136-e363e29cf1a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch = PythonCall.pyimport(\"torch\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5841a543-b2c7-40d6-aa6e-a4e602937fa9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python: SequenceClassifierOutput(loss=None, logits=tensor([[ 0.8820, -0.5574, -0.7120]], grad_fn=), hidden_states=None, attentions=None)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toks = tokenizer(texts[1, :].sentence)\n", + "model(input_ids=torch.Tensor(toks.input_ids).int().unsqueeze(0), attention_mask=torch.Tensor(toks.attention_mask).unsqueeze(0))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "267d0d79-e8b4-41f6-8290-daf5a2d0f256", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in 1:n\n", + " scorer(texts[i, :].sentence)\n", + " scorer.visualize(\"fomc_roberta_viz_\" * string(i) * \".html\")\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "161a51e5-e3cb-4ca7-bbe2-4e95bfbfc1a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "get_attributions (generic function with 1 method)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Returns a list of tuples containing the token at the first position \n", + "# and attribution score at second\n", + "function get_attributions(text, scorer)\n", + " \n", + " attribs = scorer(text, internal_batch_size=1)\n", + " attributions = pyconvert(Array{Tuple{String, Float64}}, attribs)\n", + " return attributions\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "409785d3-9953-426f-937f-b5ebaf9a6ea7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "34-element Vector{Tuple{String, Float64}}:\n", + " (\"\", 0.0)\n", + " (\"rem\", -0.09617849663936606)\n", + " (\"ained\", -0.29702715615956665)\n", + " (\"well\", 0.25551209840018674)\n", + " (\"below\", -0.4015987185377847)\n", + " (\"their\", -0.013975036770146217)\n", + " (\"levels\", -0.13794157805498133)\n", + " (\"at\", -0.002103001077563767)\n", + " (\"the\", 0.14926277592717438)\n", + " (\"beginning\", 0.08228264530334284)\n", + " (\"of\", 0.08006335674570937)\n", + " (\"the\", 0.24284418735078794)\n", + " (\"year\", 0.20003154332566433)\n", + " ⋮\n", + " (\"oil\", -0.06976564966318043)\n", + " (\"prices\", 0.011989646035101107)\n", + " (\"had\", -0.15355389676950998)\n", + " (\"been\", -0.10492307025848874)\n", + " (\"holding\", -0.07771222018418246)\n", + " (\"down\", -0.024919108109520634)\n", + " (\"consumer\", 0.09788758630638592)\n", + " (\"price\", 0.011979387105458176)\n", + " (\"inflation\", -0.09257150340664654)\n", + " (\".\", 0.15492799445950947)\n", + " (\"\", -0.3214473479860127)\n", + " (\"\", 0.0)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attributions = get_attributions(texts[1, :].sentence, scorer)" + ] + }, + { + "cell_type": "markdown", + "id": "b25039fe-329f-46e4-b308-33bc42c734ea", + "metadata": {}, + "source": [ + "### Mask the word attributions\n", + "This step prepares the text for the CMLM. The $K$ tokens with the highest attribution score are masked." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48278c99-7368-4eaa-b26f-1b91674fc514", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "08a37453-82d2-4319-a719-a21d6685c88f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "group_into_words (generic function with 1 method)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The tokenizer returns tokens instead of words\n", + "# the tokens are grouped into words and max pooling is used to get the word attribution\n", + "function group_into_words(text, attributions, cls_tkr)\n", + " toks = decode(cls_tkr, encode(cls_tkr, text).token)\n", + " word_attributions = []\n", + " for (i, (dec_tok, attrib)) in enumerate(zip(toks, attributions))\n", + " if startswith(dec_tok, \"<\")\n", + " continue\n", + " elseif length(word_attributions) == 0 || startswith(dec_tok, \" \")\n", + " push!(word_attributions, ([i], [attrib[1]], [attrib[2]]))\n", + " else \n", + " last_processed = last(word_attributions)\n", + " push!(last_processed[1], i)\n", + " push!(last_processed[2], attrib[1])\n", + " push!(last_processed[3], attrib[2])\n", + " end\n", + " end\n", + " return word_attributions\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "02b63bfd-134c-44ce-b2de-67ee99067745", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "28-element Vector{Any}:\n", + " ([2, 3], [\"rem\", \"ained\"], [-0.09617849663936606, -0.29702715615956665])\n", + " ([4], [\"well\"], [0.25551209840018674])\n", + " ([5], [\"below\"], [-0.4015987185377847])\n", + " ([6], [\"their\"], [-0.013975036770146217])\n", + " ([7], [\"levels\"], [-0.13794157805498133])\n", + " ([8], [\"at\"], [-0.002103001077563767])\n", + " ([9], [\"the\"], [0.14926277592717438])\n", + " ([10], [\"beginning\"], [0.08228264530334284])\n", + " ([11], [\"of\"], [0.08006335674570937])\n", + " ([12], [\"the\"], [0.24284418735078794])\n", + " ([13, 14], [\"year\", \",\"], [0.20003154332566433, 0.13104142887853437])\n", + " ([15], [\"and\"], [0.17795123590947837])\n", + " ([16], [\"that\"], [0.177958452206897])\n", + " ⋮\n", + " ([20], [\"earlier\"], [-0.2643532357406369])\n", + " ([21], [\"declines\"], [0.14990056209456956])\n", + " ([22], [\"in\"], [-0.3258371366392156])\n", + " ([23], [\"oil\"], [-0.06976564966318043])\n", + " ([24], [\"prices\"], [0.011989646035101107])\n", + " ([25], [\"had\"], [-0.15355389676950998])\n", + " ([26], [\"been\"], [-0.10492307025848874])\n", + " ([27], [\"holding\"], [-0.07771222018418246])\n", + " ([28], [\"down\"], [-0.024919108109520634])\n", + " ([29], [\"consumer\"], [0.09788758630638592])\n", + " ([30], [\"price\"], [0.011979387105458176])\n", + " ([31, 32], [\"inflation\", \".\"], [-0.09257150340664654, 0.15492799445950947])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = texts[1, :].sentence\n", + "word_attributions = group_into_words(text, attributions, cls.tkr)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e513d132-c2c8-4947-8ea8-2bda508c99b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "get_top_k_idx (generic function with 2 methods)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Returns a list of indexes of words with the highest attribution scores\n", + "function get_top_k_idx(attributions, k=10)\n", + " sorted = sort(attributions, by = x -> -maximum(x[3]))\n", + " idx_to_mask = []\n", + " for row in first(sorted, k)\n", + " append!(idx_to_mask, row[1])\n", + " end\n", + " return idx_to_mask\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee80bad8-3641-43d1-8d6d-6dcd87e40f15", + "metadata": {}, + "outputs": [], + "source": [ + "idx_to_mask = get_top_k_idx(word_attributions)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d2071233-269a-49f6-b9d3-5e87f5be6d9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mask_toks_at_idx (generic function with 1 method)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Masks tokens (here words) at corresponding indexes and forms them into a string\n", + "function mask_toks_at_idx(toks, idx_to_mask)\n", + " masked_text = Vector{Char}()\n", + " for (i, token) in enumerate(toks)\n", + " if startswith(token, \"<\")\n", + " continue\n", + " elseif i in idx_to_mask\n", + " append!(masked_text, \" [MASK]\")\n", + " else\n", + " append!(masked_text, token)\n", + " end\n", + " end\n", + " \n", + " return String(masked_text)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "04662dad-be82-4888-9c6f-c26093347e80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\"" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", + "masked_text = mask_toks_at_idx(toks, idx_to_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a596f684-1b82-4823-987b-adc633545977", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(BertTextEncoder(\n", + "├─ TextTokenizer(MatchTokenization(WordPieceTokenization(bert_uncased_tokenizer, WordPiece(vocab_size = 30522, unk = [UNK], max_char = 100)), 5 patterns)),\n", + "├─ vocab = Vocab{String, SizedArray}(size = 30522, unk = [UNK], unki = 101),\n", + "├─ startsym = [CLS],\n", + "├─ endsym = [SEP],\n", + "├─ padsym = [PAD],\n", + "├─ trunc = 512,\n", + "└─ process = Pipelines:\n", + " ╰─ target[token] := TextEncodeBase.nestedcall(string_getvalue, source)\n", + " ╰─ target[token] := Transformers.TextEncoders.grouping_sentence(target.token)\n", + " ╰─ target[(token, segment)] := SequenceTemplate{String}([CLS]: Input[1]: [SEP]: (Input[2]: [SEP]:)...)(target.token)\n", + " ╰─ target[attention_mask] := (NeuralAttentionlib.LengthMask ∘ Transformers.TextEncoders.getlengths(512))(target.token)\n", + " ╰─ target[token] := TextEncodeBase.trunc_and_pad(512, [PAD], head, tail)(target.token)\n", + " ╰─ target[token] := TextEncodeBase.nested2batch(target.token)\n", + " ╰─ target[segment] := TextEncodeBase.trunc_and_pad(512, 1, head, tail)(target.segment)\n", + " ╰─ target[segment] := TextEncodeBase.nested2batch(target.segment)\n", + " ╰─ target := (target.token, target.segment, target.attention_mask)\n", + "), HGFBertForMaskedLM(HGFBertModel(Chain(CompositeEmbedding(token = Embed(768, 30522), position = ApplyEmbed(.+, FixedLenPositionEmbed(768, 512)), segment = ApplyEmbed(.+, Embed(768, 2), Transformers.HuggingFace.bert_ones_like)), DropoutLayer(LayerNorm(768, ϵ = 1.0e-12))), Transformer<12>(PostNormTransformerBlock(DropoutLayer(SelfAttention(MultiheadQKVAttenOp(head = 12, p = nothing), Fork<3>(Dense(W = (768, 768), b = true)), Dense(W = (768, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12), DropoutLayer(Chain(Dense(σ = NNlib.gelu, W = (768, 3072), b = true), Dense(W = (3072, 768), b = true))), LayerNorm(768, ϵ = 1.0e-12))), nothing), Branch{(:logit,) = (:hidden_state,)}(Chain(Dense(σ = NNlib.gelu, W = (768, 768), b = true), LayerNorm(768, ϵ = 1.0e-12), EmbedDecoder(Embed(768, 30522), bias = true)))), Transformers.HuggingFace.HGFConfig{:bert, JSON3.Object{Vector{UInt8}, Vector{UInt64}}, Nothing}(:_name_or_path => \"bert-base-uncased\", :architectures => [\"BertForMaskedLM\"], :attention_probs_dropout_prob => 0.1, :classifier_dropout => nothing, :gradient_checkpointing => false, :hidden_act => \"gelu\", :hidden_dropout_prob => 0.1, :hidden_size => 768, :initializer_range => 0.02, :intermediate_size => 3072…))" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Loads the CMLM model from HuggingFace\n", + "function load_model(; kwrgs...)\n", + " model_name = \"karoldobiczek/relitc-FOMC-CMLM\"\n", + " tkr = Transformers.load_tokenizer(model_name)\n", + " cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...)\n", + " mod = Transformers.load_model(model_name, \"ForMaskedLM\"; config = cfg)\n", + "\n", + " return tkr, mod, cfg\n", + "end\n", + "cmlm_tkr, cmlm_model = load_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "af3721c6-c528-4256-bb22-a4476a1e4568", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "32-element Vector{String}:\n", + " \"[CLS]\"\n", + " \"remained\"\n", + " \"[MASK]\"\n", + " \"below\"\n", + " \"their\"\n", + " \"levels\"\n", + " \"at\"\n", + " \"[MASK]\"\n", + " \"beginning\"\n", + " \"of\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " ⋮\n", + " \"in\"\n", + " \"oil\"\n", + " \"prices\"\n", + " \"had\"\n", + " \"been\"\n", + " \"holding\"\n", + " \"down\"\n", + " \"[MASK]\"\n", + " \"price\"\n", + " \"[MASK]\"\n", + " \"[MASK]\"\n", + " \"[SEP]\"" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "6ee6f417-25b9-4775-b0fd-165750e0584f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "get_idx_cmlm (generic function with 1 method)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Since the CMLM model and the classifier models' tokenizers differ\n", + "# we have to create a different masking for the CMLM tokenizer\n", + "function get_idx_cmlm(cmlm_decoded)\n", + " idx_to_mask = []\n", + " for (i, tok) in enumerate(cmlm_decoded)\n", + " if tok == \"[MASK]\"\n", + " push!(idx_to_mask, i)\n", + " end\n", + " end\n", + " return idx_to_mask\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af757265-243c-450e-8af4-adcd11b03485", + "metadata": {}, + "outputs": [], + "source": [ + "idx_to_mask = get_idx_cmlm(cmlm_decoded)" + ] + }, + { + "cell_type": "markdown", + "id": "1faeb671-dead-4ba1-867f-39eaf990a507", + "metadata": {}, + "source": [ + "### Fill in masks" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "ff76df2f-f5e5-40bf-9a37-a517ac17acc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "merge_tokens (generic function with 2 methods)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merges a token list into a string, masking at specified indexes\n", + "function merge_tokens(tokens, idx_to_mask=[])\n", + " merged_text = Vector{Char}()\n", + " for (i, token) in enumerate(tokens)\n", + " if i in idx_to_mask\n", + " append!(merged_text, \" [MASK]\")\n", + " else\n", + " append!(merged_text, \" \" * token)\n", + " end\n", + " end\n", + " \n", + " return chop(String(merged_text), head=1, tail=0)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "7263a950-f7d8-4b02-a071-5314e7ad2559", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "group_into_words (generic function with 3 methods)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merges the CMLM output token list into a string\n", + "function group_into_words(cmlm_out, delim=\"##\")\n", + " word_list = []\n", + " for token in cmlm_out\n", + " if startswith(delim, token) && length(word_list) != 0\n", + " last(word_list) = last(word_list) * chop(token, head=2, tail=0)\n", + " else \n", + " push(word_list, token)\n", + " end\n", + " end\n", + " return word_list\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "1ce0fbff-a40c-4e95-9489-c1f36478b29d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "left_to_right_filling (generic function with 1 method)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Recursively fills in the tokens\n", + "# The function selects the lowest index from mask_position and uses the CMLM\n", + "# to fill in the predicted token at the given position\n", + "# Once the mask_position list is empty, the merged string is returned\n", + "function left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + " if length(mask_positions) == 0\n", + " return merge_tokens(tokens)\n", + " end\n", + "\n", + " masked_text = merge_tokens(tokens, mask_positions)\n", + " # println(masked_text)\n", + " \n", + " out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit)\n", + " \n", + " mask_positions = sort(mask_positions)\n", + " next_position = popfirst!(mask_positions)\n", + "\n", + " next_token = out[next_position+1]\n", + "\n", + " tokens[next_position] = next_token\n", + "\n", + " return left_to_right_filling(tokens, mask_positions, model, tokenizer)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "2d3bd657-d656-462c-97b9-58b5dd923394", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"[CLS] remained well below their levels at the beginning of august to alleviate concerns about weaker demand as earlier increases in oil prices had been holding down consumer price expectations . [SEP]\"" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "c2a089f0-e814-43ba-8da6-e22e1f087a7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "uncertainty_filling (generic function with 1 method)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Recursively fills in the tokens using CMLM uncertainty\n", + "# The function selects the masked token with the lowest logit entropy\n", + "# and fills in the predicted token at the given position\n", + "# Once the mask_position list is empty, the merged string is returned\n", + "function uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", + " if length(mask_positions) == 0\n", + " return merge_tokens(tokens)\n", + " end\n", + "\n", + " masked_text = merge_tokens(tokens, mask_positions)\n", + " # println(masked_text)\n", + "\n", + " logits = cmlm_model(encode(cmlm_tkr, masked_text)).logit\n", + " out = decode(cmlm_tkr, logits)\n", + "\n", + " probs = softmax(logits[:, mask_positions, :], dims=1)\n", + " \n", + " entrs = []\n", + " for i in 1:length(mask_positions)\n", + " push!(entrs, entropy(probs[:, i]))\n", + " end\n", + " \n", + " next_position = mask_positions[argmin(entrs)]\n", + " filter!(x -> x != next_position, mask_positions)\n", + " \n", + " next_token = out[next_position+1]\n", + "\n", + " tokens[next_position] = next_token\n", + " return uncertainty_filling(tokens, mask_positions, model, tokenizer)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "67ff3d94-59e9-4235-9285-4aeb3ba841ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"[CLS] remained well below their levels at the beginning of august to help alleviate the weaker demand that earlier gains in oil prices had been holding down on price stability . [SEP]\"" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uncertainty_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "markdown", + "id": "f3db316c-2cce-4163-9856-e12d69d577b4", + "metadata": {}, + "source": [ + "### Putting it all together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c6cc184-c8f7-4201-8886-f5da43785223", + "metadata": {}, + "outputs": [], + "source": [ + "n = 10\n", + "data = load_training_sentences()\n", + "texts = filter(:split => n -> n == \"test\", data)[1:n, :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a14a2e5-4dc6-4fb4-94ee-01cf6526550c", + "metadata": {}, + "outputs": [], + "source": [ + "cmlm_tkr, cmlm_model = load_model()\n", + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)\n", + "\n", + "using PythonCall\n", + "\n", + "transformers_interpret = PythonCall.pyimport(\"transformers_interpret\")\n", + "transformers = PythonCall.pyimport(\"transformers\")\n", + "\n", + "# load pre-trained classifier and corresponding tokenizer\n", + "model = transformers.RobertaForSequenceClassification.from_pretrained(\"model\", local_files_only=true)\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")\n", + "\n", + "scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type=\"lig\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fb7e883-bad7-47fe-b44b-930a961225ee", + "metadata": {}, + "outputs": [], + "source": [ + "attributions = get_attributions(texts[1, :].sentence, scorer)\n", + "\n", + "text = texts[1, :].sentence\n", + "word_attributions = group_into_words(text, attributions, cls.tkr)\n", + "idx_to_mask = get_top_k_idx(word_attributions)\n", + "\n", + "toks = decode(cls.tkr, encode(cls.tkr, text).token)\n", + "mask_toks_at_idx(toks, idx_to_mask)\n", + "\n", + "cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token)\n", + "\n", + "idx_to_mask = get_idx_cmlm(cmlm_decoded)\n", + "\n", + "left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_model, cmlm_tkr)" + ] + }, + { + "cell_type": "markdown", + "id": "e944e49b-253c-4850-af4b-d483d870af84", + "metadata": {}, + "source": [ + "### Detour: Visualizing CMLM uncertainty through entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "7c0fde75-655e-48d1-8e26-c0553ff510c7", + "metadata": {}, + "outputs": [], + "source": [ + "using Plots" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "ca6bef83-20a1-4897-a4ba-b4f0bb24e3b7", + "metadata": {}, + "outputs": [], + "source": [ + "enc = encode(cmlm_tkr, \"remained [MASK] below their levels at [MASK] beginning of [MASK] [MASK] [MASK] [MASK] [MASK] weaker demand [MASK] earlier [MASK] in oil prices had been holding down [MASK] price [MASK] [MASK]\")\n", + "out = cmlm_model(enc)\n", + "colors = []\n", + "masks = []\n", + "for i in 1:size(enc.token)[2]\n", + " if argmax(enc.token[:, i, :])[1] == 104\n", + " push!(masks, i)\n", + " push!(colors, :red)\n", + " else\n", + " push!(colors, :blue)\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "65837706-437e-433f-8c22-f510e1addc7e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\"well\"] 0.8925405\n", + "[\"the\"] 0.001739249\n", + "[\"august\"] 2.5585306\n", + "[\"quarter\"] 4.2470326\n", + "[\"to\"] 5.439382\n", + "[\"to\"] 5.78602\n", + "[\"of\"] 4.4509435\n", + "[\"and\"] 2.307382\n", + "[\"increases\"] 2.3528287\n", + "[\"the\"] 4.842994\n", + "[\"stability\"] 2.4602342\n", + "[\".\"] 2.0014744\n" + ] + } + ], + "source": [ + "entropies = []\n", + "toks = []\n", + "\n", + "probs = softmax(out.logit, dims=1)\n", + "\n", + "for i in 1:size(out.logit)[2]\n", + " row = out.logit[:, i, :]\n", + " tok = decode(cmlm_tkr, row)\n", + " push!(toks, tok)\n", + " entr = entropy(probs[:, i])\n", + " push!(entropies, entr)\n", + " if i in masks\n", + " println(string(tok) * \" \" * string(entr))\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "7570a204-98e7-41dd-9b16-7949c822078f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAGQCAIAAAD9V4nPAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzdZ1wUZ/c38GuBpSO9igKKiooFsAdQERRUbNhb7L1rbMmtJrYYSxJ7N/Zeomhs2IMEQcWCBTsIShUp22eeF/N3n3VhlxlcWGB/348v5GLOztllzp7pw6NpmgAAAOgqPW0nAAAAoE1ohAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDTyqkR5uXlLVy4kP30NE1zvfcbRVEckyIymawChtA0zfW9VKWPqxTvnWsIRVFcP66KuaiQUn1cVWZRQWWVaUhVqqwSlVMjTE9P379/P/vpJRKJWCzmNAuhUMj1z1ZYWMhp+vIJoShKKBRyChGLxVKplFNIYWEh10W2Yn5cUqmU66IiEom41lLFfO8URQkEAk4hqCxOIagsTiEVtrJKhF2jAACg09AIAQBAp6ERAgCATkMjBAAAnVbhGuGrV69CQwd5eLSuVatNx44DX7x4oe2MAACgKjPQdgJfefDgQUjIiIyMtTTdhhCSlna7TZt+Fy/uaNq0qbZTAwCAqqlibRGOGfNjevpfTBckhNB064yMfWPG/KjdrAAAoAqrWI3wzZv3hHh/PVb/7dsP2skGAAB0QMVqhITwtJ0AAADolop1jNDFxe7jxyRC6iiMvXJ2ttFaQgAAlVNOTk5YWJj8zjgURenpcdjyYe6Pw+Nx2DgpRQj7rHg83t69e728vNi/OHsVqxFu3PhLePiQzMztX3aQPrazG7Vp0+9aTgsAoLL59OlTcnLy6dOntZ2IZowfP/79+/c60QhbtWp5+fKWMWPmvn2bRgjPzc1py5ZNOGUUAKAUjIyM/Pz8tJ2FZlSrVq3sXrxiNUJCSJMmTf77L1IsFtM0bWRkpO10AACgiqtoJ8sAAACUKzRCAADQaWiEAACg09AIAQBAnVI8oLhyQSMEAIBiZGVlzZo1x9m5lpGREZ/P9/BouGzZ8rJ4QLzWVbizRgEAQOuePHkSGBiSl+clEv1BSFNCJG/exC5ZsnrHjn3//hvl5OSk2dklJib+/fffjx498vb2njdvnmZfvETYIgQAgK+IRKJOnbrn5AwQiS4R0o2QmoTUJmSAQBCTktKoR4/+Gp9jdHT0u3fvCgoKbt26pfEXLxEaIQAAfOXw4cOZmXoy2fIi9382EIu33L//8MaNG6V75bi4uG3btsl/jImJ2blzJyFk1KhRmzZtCggIKH3S3wCNEAAAvnLu3GWhsJeKY2eWNB16+fLl0r2ym5vbzJkzMzIymB9//vlnsVhc2jQ1Bo0QAAC+kpLykaZdVf1WLK6RnJxWule2t7fv0qXL3r17CSHv3r2Ljo4eOHBgKbPUHDRCAAD4irV1NUJyVf1WXz/H1tay1C8+fvz4rVu30jS9devWfv36lelNRFlCIwQAgK+0bdvSxOSSil9ShoZRrVq1KvWLBwYG8vn8q1ev7tq1a+zYsaV+HQ1CIwQAgK8MGTKEkHhCThT9FY/3u6WlrGvXrt/y+mPHjh0+fLizs3MFeTgGGiEAAHzF0dFx9+7tfP4QHm+Zwj7Sj/r6MwwNFx47ts/Y2PhbXn/IkCEZGRljxoyRjxw5csTGxmbp0qW3b9+2sbGZPHnyt7w+V7igHgAAlPXp08fBwWH8+BnPni00MalN01KB4HWLFu22bo3x9vb+xhdPTU01NjZWPE2mb9++ffv2/caXLTU0QgAAKEbbtm0TE+Nfvnz59OlTfX39xo0bu7i4fPvLrl27dufOndOmTTM3N//2V9MINEIAAFCpdu3atWvX1uAL2tjYLFq0KDw8XIOv+Y3QCAEAoPwMHjxY2ykow8kyAACg09AIAQBAp6ERAgCATkMjBAAAnYaTZQAAqiAjI6N3797Z2NhoOxHNyMvL+/nnn8voxdEIAQCqIBcXl+zsbJlMxvyYn5/P6bo9iURCUZSRkRH7EKFQaGBgYGDAoa1wysra2pr9K3OCRggAUDUpPtjBwMDAwsKCfaxEIpHJZJxupSYQCPh8PqdGyDWrMoJjhAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADqNcyPMzs4ePHiwo6NjREREbm5uyQEAAAAVGLfLJ4RCYadOnXr27Dlz5swpU6b8+uuvy5cvZxMok8ny8vI2b97M/Mjj8QICAurVq6dmepqm5VfAsJwFE8U1hP305RMi+4JTCE3TenocVmuYWfB4PK4h7Kcvn5DSfVx6enpV4L1TFFW6RQWVxT4ElcUppAJWFpuUuDXCEydO2Nvbz58/nxAye/bsixcvPnz4sHbt2qampuoDxWKxUCiMi4uTjzg4ONSqVUvV9BKJhOvyJ5FI9PX1uYZIJBL205dPiEwm4xoikUg4vXF5VpwW2Yr5cTFXO+nr63MKIYRw+mavmO+doqhSfFyoLE6zQGVxCiEVr7L4fH6J74JbI8zPz09OTmZuH7Bnz547d+5cvnw5Jyfn0qVLjRo1UhNoYmJib2+/fft2ljPS09OjaZrTTQ0oijI2NuZarpwuFy2fEGZ1klMIj8fT09Pj8/nsQ6RSqbGxMddyLYePi0mM/fT6+voyjpf90jTN9bLfirmoUBRF0zSnEFSWzlYW15CqVFkl4rayM3jwYCMjo/r16zdt2jQnJycxMTExMbF3796TJ0/WbFoAAADlg1sjNDU1jY2NjYqKatmy5YwZM0xNTXk8Xr9+/VJSUsooPwAAgDLF+axRPT29WrVqFRYWXrx4kRAik8m2bNkyZMiQMsgNAACgzJXypts//PBDcHDwf//9l5+f36hRox9//FGzaQEAAJSPUjZCX1/fR48enT171tPTs3379prNCQAAoNyU/jFMLi4uo0eP1mAqAAAA5Q+3WAMAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEQIAgE5DIwQAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEQIAgE6rOo0wLS3t8+fP2s4CAAAqmUrfCGma/vPPLc7OTdq0mVavXq+GDdvFxcVpOykAAKg0DLSdwLf6+edVa9a8ysuLIcSEEJKe/qJz54HXr++uX7++tlMDAIBKoHJvEUokkk2b9uXlrWW6ICGEEM+MjFXz56/WZloAAFB5VO5GmJyczOPVI4T/9fB3CQmPtJMQAABUNpW7EZqYmBCSX2S4wMjISAvZAABAJVS5G6Gzs7OZWTYhKYqDhob7e/bsqK2UAACgcqncjZAQ8tdfq+3tu/N4/xAiICTD1HRl7doHfvxxmrbzAgCAyqHSN8KAgO/u3v170KB/6tQJ8fX9/n//M0xIuGJmZqbtvAAAoHIozeUTsbGxe/fuXblypbGxscYTKgVXV9e9e9cWFhYaGxvr6VX61g4AAOWJc9u4ePFijx49AgMDDQ0NyyIhAACA8sR5i3DSpEk7d+4MDQ0lhGRlZVWrVo3P55cYVVBQkJqaGhQUJB/p37//gAEDVE0vFotpmhaLxewTEwgEEomE0xZhfn7RM061HyKTycRisUwmYx8iEon09PTY/CHkCgoKKIri8XjsQ8rn4yooKKBpmv30EolEJpNJJBL2IQKBgM/nGxhwWPgr5qJCUZRQKKQoin0IKktnK4trSJWpLGNj4xL/gtwaYV5e3suXL0NCQjIyMgYNGhQVFWViYrJs2bIpU6aoDzQ1NbWxsZk/f758pEmTJhYWFqqmZ8qV01UQ+vr6pdg1qiYHbYXIZDKRSGRqaso+xNDQkGu58ng8MzMzTuVKyuXj4vF45ubm7KdnypXTXnoDAwOu5Uoq5KJCUZSBgQGnI+KoLJ2tLK4hVayy1OOWsbm5uZWV1e3bt7du3RoSEvLPP/9cvHhxwIABDg4O/fv3VxPI4/GMjY2Dg4O/LVsAAAAN47aWx+Pxxo8fP3HixHv37v3www/6+vphYWGLFy/evn17GeUHAABQpjifLLNw4UILC4tHjx5FR0czI87OzriTC1RVRw8ebOPp2a527WbVq08eMiQ7O1vbGQGAhnE+WYbP50dGRg4cOLBHjx6rVq2ytraeN28etgihSvr1p5+erF8fmZtrQwgh5NihQ51u3br68CGno5gAUMGV5jpCKyurM2fOHDx4cPfu3Xp6ehs2bGjbtq3GMwPQrs+fPx/dujU2N1f/y0hvqfRDauq2deumz5unzcwAQKNKefm5vr7+4MGDL126dOHChY4dcWNPqIISEhICZDL9rwfDxOLb589rJyEAKBu4DwtA8Xg8XtEL9ChCeLh7EUDVgpIGKJ6Pj89NfX3p14NnjI0DunbVTkIAUDbQCAGKZ2ZmNnz69H5WVmmEEEIoQnYZGh5xdR05YYKWMwMAjSrNyTIAOmLKvHl1mzQZ+MMPeVlZekZGweHhl3/91cTERNt5AYAmoRECqBPauXNo5855eXkav6sTAFQQ2DUKAAA6DY0QAAB0Ghoh6JZPnz7dvn377t27QqFQ27kAQIWAY4SgKyiK+nnWrPP797eUSkV6encMDGYtXz5w2DBt5wUAWoZGCLpiyZw5kq1bbxcUMLtB8gnpN2OGnZNTx9BQLWcGAFqFXaOgEyiKOr5nz+IvXZAQYk7I+pyctQsWaDMtAKgA0AhBJ2RnZ7vweEo3DvUg5GNamnYSAoAKA40QdIK5uXkOTSsNFhJiYGiolXwAoOJAIwSdYGxs7FC7dszXg9uMjcP799dOQgBQYeBkGdAVaw8c6N2u3cCPH0OEQiEhBywtn3l5Hf/pJ23nBQBahkYIusLd3f3W06c7Nm5ceemSqZlZSN++a/r04fF42s4LALQMjRB0iLGx8cQZM0aMH8/n8w0MsPADACE4RggAADoOjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAEAVJxaLMzMztZ1FxYVGCABQZT1//rxrq1btatSY0KpVE2fn9StXUhSl7aQqHNx3GACgakpNTe3ftu32Dx98CSGECAiZ98svi96//+WPP7ScWQWDLUIAgKpp3dKl89PTfb/8aELImvz8s/v35+fnazOtigeNEACgarp/+3bg1ztC9QhpQciTJ0+0lVLFhEYIAFA1GRkbC4oMFvJ4RkZGWsimAkMjBAComoIjIg6bmCiO5BKSYGDQsGFDbaVUMaERAgBUTaMnTTrn6bnCzCyHEBkhMYR0trX9Zd06fX19badWseCsUQCAqsnIyOhSfPzGVav6HjyY+/lzA2/v7StX1q9fX9t5VThohAAAVRafz586b97UefPy8vIsLCy0nU4FhV2jAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCKBh586cCW/Zsl39+mF+fkcPHtR2OgBQApw1CqBJ8yZOTDlwYN2nT+6EpL5/v3j8+MunTm05fFjbeQGAStgiBNCYpKSkuCNH9nz65E4IIcSFkE25udlRUbGxsdpNDADUQCME0JhrV670ysnhfT3YNysrKjJSOwkBAAtohAAaIxIKjYs89dSYEFFBgVbyAQA20AgBNMa3efNr1tZKg1erVfNr21Yr+QAAG2iEABrTpk2brHr1NhsayrcKDxgY3K1Zs3OXLtpMCwDUQiOEyurt27eDw8Jaubt/5+HR/bvvHjx4oO2MCCHk8KVLb0eN8rG3D3Vw8HVwiBkw4NTNm7jZP0BFhssnoFJ69uzZwLZt/0hPD6BpQsjDDx/GBAWtOHYssF077SZmZma2fMOG5Rs2vH792t3dncfjlRwDAFqFLUKolBZNnrzh40emCxJCGhFyNCvrx3HjtJuVIjs7O3RBgEoBjRAqpeePH7f6esSVEGlOjlgs1k5CAFBpoRFCFULT9JdtRAAAltAIoVLy9PKK+3okjRB9KysjIyPtJAQAlRYaIVRKC9etG+vg8N+XH58R0sfW9peNG7WZEwBUTjhrFCqlBg0aHLp1a/7Ysa+fPCE07VCjxh+bNjVr1kzbeQFA5YNGCJVVnTp1jl65IpFIZDKZsbGxttMBgMoKu0YBAECnlaYRrl27NiYmRuOpAAAAlD/Ou0bT0tJmz55tZGR04cKFVq1alRzwhVQqffXqlfxHJycnU1NTrnMHAADQLM6N0NnZuVatWsOHD+/UqRP7Xpifn5+amtqhQwf5yMSJE8epvg+IWCymaVoikbBPrLCwUCqV6ulx2MYtKCjgeu+PcgiRyWQikYgq8jQfNUQikZ6eHp/P55QVTdOcEiu3j4vT9MwxQqlUyj5EIBDw+XwDAw4Lf8VcVCiKEgqFnC6dRGXpcmVxCqkylWVsbFxiSqU5Wcbd3b1nz54SiYTphQkJCb169bK3t1cTYm5uXrNmzRcvXrCcBVOunK4J09PTMzY25lSuNE2bm5uzn758QmQyGZ/P57S5zOfzuZYrIcTMzIzT8lQ+HxchhFNIKU6W0dfX51quFXNRoShKX1/fzMyMfQgqS2cri2tIVaqsEpWmETZv3jwmJmb+/PmEkHbt2nl6evbu3VuzaQEAAJSP0pwswzRCQohUKrW1tU1OTk5KStJ0YgAAAOWh9I1w8eLFJ0+efPDgwZw5c1auXKnxzAAAAMpBaXaNOjo6CoXCEydOXL582dbWdv78+ZwOqAIAAFQcpbyzzLlz58zMzGxtbf/vVbgcHQUAAKg4StnAatasqdk8AAAAtAK3WAMAAJ2GRggAADoNjRAAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AIAQBAp6ERAgCATkMjBAAAnYZGCAAAOg2NEAAAdJqBthMAAJKSknL69NnExDdt2jTu1aunsbGxtjMC0CHYIgTQsnXrdvj69p4yhWzY4D9q1Is6db6Li4vXdlIAOgRbhADa9PDhw59/3peVdZMQPiFEIOiSkjKkV6+Ily9j+Hy+trMD0AnYIgTQpm3bDmdlTWe64BfuBQWtY2JitJYTgI5BIwTQprdvPxBSU2mwsNAtLS1NK/kA6CA0QgBtqlevJo+XpDRoZva8Zk3l7ggAZQSNEECbxo4dZGu7ipDPCmP3ra0ftWjRQms5AegYnCwDoE21a9fevPmnSZPa5eeHFxa6Wlndc3B4GBm5T08PK6kA5QSNEEDLIiLCO3Zsd+3atZcvX/v69vf3X48uCFCe0AgBtM/CwiI8PDwvL8/CwkLbuQDoHKx4AgCATkMjBAAAnYZGCAAAOg2NEAAAdBoaIQAA6DQ0QgAA0GlohAAAoNPQCAEAQKehEUKFIBKJflu4sF2DBu3q1x8QEpKQkKDtjABAV+DOMqB9AoGgo59fxNu3kYWFZoTcS0mZHBQ0Zd263gMHajs1AKj6sEUI2rd17dqer19PKyw0J4RHiC8hZ7Ozl8ycKZVKtZ0aAFR92CIE7bty6tQ6oVBxxIKQpjJZYmJi48aNtZUVALBE0/T58+f/i4oysbDo0KVLs2bNtJ0RN2iEoH0Ssdi4yKAxTYtEIi1kAwBcZGdn9+3Qof7r18G5uUJClq1fb9u27ebDh/X19bWdGlvYNQra5+vvf/XrmqEIucPjNWjQQFspAQBLU4cMmfrw4brc3O6E9CPkRGamw/nzm9as0XZeHKARgvZNmjfvVweHGB6P+bGAkIkWFj2GDTMzM9NuYgCgnkQieXLnTrhMpjg4u6DgyI4d2kqpFNAIQfucnJyO37q1MjCwpYNDkL19YPXqvsuW/bRihbbzAs4unj/fycenfb1633l6rl68WCwWazsjKFu5ubn2RZ4jbUlIYX6+VvIpHRwjhAqhVq1ax69dk0ql6enpLi4u2k6nakpNTY2JiaEoqk2bNmXxIa/6+eeYP//cnpNTgxABIet+/bXzyZMX7typRMeKgCtra+sPNE0TwlMYTCfE0tpaazlxhy1CqEAMDAyqVaum7SyqIJqm/zd1at+mTR99/33isGF9mzadP3EiRVEanEVWVtaRDRsO5+TUIIQQYkLI7MLC5klJRw4e1OBcoKLR19cPDA3dZGQkH6EImWNpOXLWLC1mxRUaIUDVt3bFioKdO29mZCzIz19QUHAzI0OyZ88fy5ZpcBaxsbEhIpHSpl+v/Pzrp05pcC5QAa3YsiUmODjc3n6toeEKE5MAe/s648cP/P57befFAXaNAlR9+zZtupGfL995xSNkcX6+/9atM376SVOzkEql/CKbmHxCJDhMWNUZGxvviYx88uRJdHS0g5nZ0cDASnd0A40QoOqjhUKTr0eMCeF9fRODb+Tn57fSxGTR16dIXDQxadmpkwbnAhVW/fr13d3d+Xy+gUHlayvYNQpQ9cn09ZU21mhCJBo9h8XFxcWva9eZ5uaFX0aO6esfd3EZOnKkBucCUBbQCAGqvuCuXXfy+Yoju/n8oLAwzc5l9fbtdRYvDnR1bWNv7+fkdK1//3P//WdsXPSuQQAVS+XbhgUArhb9/nvfBw/inj3r+ekTj5BTVlav69Q5unatZueip6c3btq0cdOmZWVl2draavbFAcoOGiFA1WdmZnY2Jub8+fPXTp+maTq8W7cwTW8OKjI0NCy7FwfQODRCAF0RGhoaFBRE07SRwlVfAIBjhAAAoB2yr29Sqi1ohAAAUK5omt7/118tPTw61q3r5+w8pk+f9PR0LeZTyl2jhYWFpqammk0FAAB0wdK5c99s3nzx82dLQgghZ06eDIuNvfrwobbusMh5i/Ddu3dBQUHm5uZubm6xsbFlkRMAAFRVnz9/PrVr19YvXZAQEi6TjU5L2/LHH9pKidsWYW5ubocOHcaPH79///41a9YMHjz4+fPnbALFYvGnT5+WLl0qHwkICGjZsqWa6Wma5pSbSCTi8Xh6RR4Ioj6E6+lt5RAik8lEIhGnG/aLRCI9PT1O91AWiUQGBgY8Hq/kSRVCyufj4n99xZt6EolEJpNxfSMURXE6OFExFxWKopi/I/sQVJYuVxankLKrrPj4eH+ZTGl56iSRzLx4UTRnDpu5cHojfD6/xKWX2xbhb7/95u/vP2PGDGdn52XLlr1///7du3csYymK+qRAs3e+BwCASkFPT09aZFBKiBYf18Vti9Dd3b1fv37M//l8fu3atdPT02vWrFlioKGhoY2NzcqVK1nOiMfjcT3JWyaTGRkZcVpvFYvFXM8jL4cQZn2K61z09PS4bkgZGRlxWt0rn4+LSYz99Hp6esyfnn0IRVFc74hYMRcViqIoiuIUgsrS2criGlJ2ldW8efOp+vpiQhQ3606ZmLTv0YPN7Erx3kvEbYtw9OjRjRs3lv9oamoqkUgIIW/fvj1//rxmMwMAgKrH1NR03Lx5EdbWbwkhhEgJ2WxkdKpmzRHjx2srpW+6fILP58tksrdv37Zv3z45OVlTOQEAQBU2bvr0GceOjfHx8XNyCvDw+DBp0qW7d7V4W9pvurOMnp7e69evhw0bNmvWrNGjR2sqJwAAqNraBwW1v3s3Ly/PwsJC27l8WyPU19cfO3bsH3/8MWbMGE0lBAAAUJ6+qRF26dKlf//+6IIAAFB5fVMjnDlzpqbyAAAA0ArcaxQAAHQaGiEAAOg0NEIAANBpaIQAAKDT0AgBAECnoRECAIBOQyMEAACdhkYIAAA6DY0QAAB0GhohAADoNDRCAADQaWiEAACg09AI2aIoKiEh4Z9//nn06BFN09pOBwAANOObnj6hOxITEyMixmVluQsEtUxMTjk6pp04sbVOnTrazgsAAL4VGmHJCgoKOnYc/P79AUK8CCH5+SQjIyE4eMDz5/8aGRlpOzsAAPgm2DVaspMn/87OjmC64BdNcnI6nj9/Xms5AQCAhqARluzhw5cCQSOlwby8xg8fJmklH4DykZKSMnToVB+f0BYtwn/99U+xWKztjADKBHaNlszZ2cbA4INU+tWgoeEHFxc7LWUEUOZu3vy3d++pGRmLaXo5IXlPnuzZvTvozp3z5ubm2k4NQMOwRViynj27Wln9RYhIYazQ0nJ/ly5h2koJoKwNGzYzPf0UTYcRYkqIY2HhD69fD16y5A9t5wWgeWiEJXNzc/v559H29kE83hFCEni8g/b2QStXznR0dNR2agBlIi0tLT/fhhBXxUGRaNCpUxe1lRJA2cGuUVYmTBgeFtZu27YDCQnn/Py8xow55urqWnIYQOUkEAh4vKK7QE1FIlExUwNUcmiEbHl4eCxb9mNeXp6FhQWb6UUi0bJlfx448HdhocjV1XnlytmBgQFlnSSARtSoUYOmnxEiIYSvMBzdpIm31nICKDPYNVomxGJxs2YdV60yfPHicmpqXGzs7z16/Pr771u0nRcAK3w+f/z4wRYWkwkRfBl7YW8/a/nyWdpMC6BsYIuwTOzYsefVq5DCwmlfBjxzck4uX95yzJjBZmZm2swMgJ2FC2dZWW1ZsaIVRbkTkm9vT+3cub5+/frazgtA89AIy8SpU9cLC+d+PWYok7WLj48PDAzUTk4AXPB4vGnTxk2bNi4pKcnOzs7a2lrbGQGUFR1thEKh8NSpUwm3brnXr9+5W7caNWpo9vXFYgkhhkqDFMWXSCSanRFAWatevbqxsbG2swAoQ7p4jPD+/fv+des+HznSf8MG3tSpffz8tvz+u2ZnERTUjM+//PUYbWBwo2nTppqdEQAAfCOda4RSqSv7ZR0AACAASURBVHRkt25Hk5MXFBZ2IWSMTHYzI+PI0qX379/X4FymTh3j5LRdT09+M9ICC4up/foF2draanAuAADw7XSuEd65c6d5YaGHwgifkFlZWYe2aPKUzmrVqsXGnu3S5ZCjo5+9fYCra9tlyxqtXbtEg7MAAACN0LljhB8+fKghFCoN1iQk7c0bzc7Iycnp9Om/CCHp6ekODg6afXGtKCgoYHkNJQBAJaJzW4Q1atRIMjFRGnzO47l5eRU7/bczKTK7ykUsFv/vf786O/s0btzPycl30KCJ2dnZ2k4KAEBjdG6L0NfX97mNzd3MTN8vI3mE/GZru2fCBG2mVYH16jXyypWGAkEsc5ORI0eOxMeHP3x4jc/nlxgLAFDx6dwWoZ6e3v7z56fVrz/exmabnt4ic/O2Tk5zNmyoU6eOtlOriBITE2NjcwWCufJbbUmlfVNSOhw6dFS7iQEAaIrObRESQjw8PK4/fnzz5s37d+/61a49o23batWqaTupCuru3bu5uUFKgwUFwVFRp4YMGaiVlAAANEsXGyEhhMfjBQYG+vj44OwP9fh8vr5+0QcOCI2MsF9UyyiKevnyZXp6uo+Pj6mpqbbTAajEdG7XKHDi7+9vZnaGEEpx0MrqWM+eypuJUJ4uXozy8Gjp7/9Tjx673d0Dpk9fIJVKtZ0UQGWFRgjqVK9effjwTlZWAwl5Rwgh5JOFxTxf3+zQ0E5azkyH3bt3b+DAhe/eRaanH87M3JqREbt1K2/y5B+1nReUB4FAcPny5f3799+8eZOiqJIDgAU0QijBb7/9b//+YX5+46pXb9awYa/ly2tfunRE20nptAUL1mZlrSLE8cuAfmHhouPHLwsEAnVhUPlFRV2rWzegT5+LkybJevQ44OUV8OTJE20nVRXo6DFC4KRz59DOnUPZP5QYytSTJ08J8ft6jMfjNXj79q1XmV0OC1r34cOHAQNmZGRcIMSeEJKdTbKzE8PChjx/Hm1oqHyLf+AEW4QAlYy5uQUhOUWGs3Dyc9W2d++RnJzxTBf8okFubuD169e1llNVgUYIUMkMHBhuarrt67EX1tZ5Li4u2kkIysWTJ2+l0rpKg7m5Xq9fv9FGOlUKGiFAJTNt2timTWMsLWcQ8oCQdwYGe5yc+hw6tE7beUHZcnNz1NNLURo0N3/n5ORY7PTAHhohQCVjaGh469bp9etbdunye6tWU2bNSk5MvIJHXVZ5gwdH2NhsIETxmQHpZmZnO3TooLWcqgqcLANQ+fB4vMGD+w0c2EcgEJiZmWk7HSgPtWvXXr58wvz5gVlZoynKw9DwoY3N3gMH/sAC8O3QCAEAKodRowZ37hx0/PjfCQnnW7Vq0K/fdZzIrRFohAAAlYaLi8vkyeNxLZNm4RghAADoNDRCAADQaWiEAABQaeTm5mr8NdEIAQCgoissLJwy5UdHx6Z+fsMcHX2+/37qp0+fNPXiOFkGAAAqupCQfnFxncXiu4ToEUIfOLD/7t0e9+9H6evrf/uLY4sQAAD+v+zs7JEjZ9arF1CvXmBExOh3795pOyMSHR397Jm1WDz+S8/iSaWD373zPXv2nEZeH40QAAD+T3JycuPGwXv2tHn9+ua7d7dPnhzQrFmP+Pi72s3qzp272dntlQY/fw66fj1eI6+PXaNVAUVRenpYpwGoZHJycvbtO3T79uMGDdwHDYrw8PDQdkZkypRFqam/0XQw8yNNB2VkHB4xYnxCwmUtZmVkZKinJ5TJlIaFJiaaef4Uvj0rt+jo202ahLi7B9ao0bp1626PHj3SdkZfEQqFJU8EoJMuXoxq0CB4xgzq4MH+CxY4tWw5eM2azdpOisTG3qNppZuX1klLy5NIJNpJiBBCSFBQO2vrE4TQioO2tke7dg3SyOujEVYgGRkZgwZN8vBoVadOQGBgxMOHD9VPf/78pW7d5j14sCU9/c7Hj3ExMUvatx9x//79Emd07969/fv3nz59OisrS0O5fyU3N3fEiOlOTj4NG/Z0cfFZsGCFWCwuixlBFfD335HffderYcN2gYG9L1y4yCbk6dOnixevnDx5wc6dfwkEghKnf/XqVYcO/VxdW3p4BHp7B0VFXSkxRCqVbt26q3//yYMGTTl48AhN0yWGnDlzzssrsFattq6uzfv2HZuenq5m4oKCgu+/n/XhwwWpdCIh/jQ9OCPj2rJlhxITE0ucUZmiaR4hPKVBHo8vK7I5Vp7q1q3bp4+fldX3hCQTQgj5aGk5qV07i1atWmlmBnS5ePHiRe3atdlPLxKJhEIhp1kUFBTIZDJOIZ8/f+Y0fZmGpKSkVK/uo6//NyEUITQh921tW58/f0lNSJ06bQhJI4RW+Pfgu+96qAnJysoKb9Omn53dWgODn83N/Rwcdqxfr9k3IpFIGjQI4PP3fnkjYhOTJZ07D2I5l7y8PJZTMsRisUAg4BRSWFgokUg4hVSoRUVOJpPl5+dzCqlolTV8+DQrq+8JeUEIRcgza+t+kyf/qD5kzpyldnbtCTlIyBUjo19dXX3v3bunZvoXL144OvryeNFfauSdtXXHgwePqQn58OFDnTqtzcx+ISSOkJhq1WY1bdpB/Ztas2aTlVUvQj4wc9HXP+3q6pOZmalq+sjIyGrV5n1dvDSPd3TOnF/Uv31G2S1dbdp0J+Th14llubu3YBNb1pV18uRpP7+w6tV9mzTpuGfPQYqiOM1LjdI0woyMjPj4eLFYzD4EjbBEAwdO5PEiv17+Pnh4tFQ1vUQisbdvoVRIhNDOzn5q5tIjIOCMvr58aiEhnWxsbt64ocE3cvjwEQuLuUpZ2dl1f/jwIZtwNEL2KnsjvHfvnq1t+NeLCmVnF/zs2TNVIVevXrO27vFlHYv599LdvZmaDHv2HMnjXfl6Lp9cXX3VJNalyxAe7x/FEEPDnePGzVY1vUgkcnBoTIhIMcTAYM/MmQtVhezatUtff12R+o0eMGCSmsTkym7piouLs7NrRcizLymlWlt3OnbsFJvYCltZJeK8a3Tnzp1eXl7dunVzc3PbunWrZjZLgZBbt/6j6dCvxxwLCsxzcnKKnZ7H4xFCFfebYgcJISQ9Pf3zs2ddFXZxGBGyJDt7x2+/lSZjFaKi4vLylB+QlpMTcudOnAbnAlXAuXNR2dl9vx7jZWf3uXAhSlXI5s1HcnJmfr3vrlZBQcOEhARVIXFx92m67ddjlhKJQ0ZGhqqQ2NgHSsUoFg+JjFSZ1dOnT2nah5CvTtyQSjtfuhStKsTDw6NatSdKgwYGiY0a1VIVUj78/Pz++Wdd48YTHRzaODj416nT58CBGRER3bWbVVnjdtZocnLy3Llz79+/7+rqeubMmREjRty/f3/Dhg08nvI+ZSX5+fnv3r1TPCdq0qRJY8eOVTU9s7nJ6fBsYWGhVCrldPJkQUFBiZmXWwhFUcUdstXPzc3l8/nFhjg4mGdkvCJEsXKiGzSom5+fX+z0z549q00pt8l6hLx68UJViCLW750mRPmIoL6+UCo1ZDkXFrP4/yQSiUwmk0ql7EMEAgGfzzcw4LDwV6hFRY6iKGbzjn1Ihaqs7OzPNK38vU9RZp8+pahaVFJSPhBSXWlQJHJ9+/ZtnTp1VM9K+SOiKIlAICh2LhKJhKaNiwwbSCS0qqwEAgFNF10BpWhaZUiTJk2srGbl5NwlxPfLWJq19fp+/Y5rtBhLE+Ll5fXvvycLCgqkUqmlpSUhhE1KpKJWlrGxcYkpcWuEiYmJrq6urq6uhJDw8PDo6Oh27do5Ozv/73//Ux9obm7u4uISFfX/V6mcnJxMTU1VTc+Uq5GREfvc9PT0jI2NOZUrTdPm5ubspy/TED+/JikpNwhRXHXNNTTMdHd3VxWyY8eKrl37ZWauI6QVIYTHu+joOGfz5qOqZufh4fG+yOeTTIhL9epsMmT5Rnr3Djly5MinT50VQy0tz3TsuJvlR8fpE2YaobFx0W8ulfT19bmWa4VaVOQoitLX1+f0XNYKVVlt2zbfti3q8+eeioPW1tcDAgaqCm/QoNa//z4h5KvLDIyMEhs0GKoqJDCw1YED/9B0V4WxdHPzvJo1a6pKzNBQRIiIEMVPKdPKykzVLHx9ffX1EwgREGIiH+TzI8PCAtV8DpcvH+7efeSHDzU/f25sbv7O3Dx67951tWqx2iIsh6XLyMioylQWqxdlLzs729zcPD4+Xj4SHx9vbGz8+PFj9YE4RliipKQkBwcfQv77sms+ucRD+jRNP3/+PCRkgIuLr6urX0TE6JSUFPXTt2/UKO7rgxKDraxOnzypwTdC03RQUIS5+XxCcpljDFZWQ0o8A0IOxwjZq+zHCKVSacOGgfr6h+UHCA0Mdvv5hag5CSIxMdHWtvWXRYs5weSSr2+Imrm8f//exaWpwmloD2xt2/zzz0U1IUuXrrGwmEKIVH4w3dKy/969h9SEbNq0y9o6jJB3TAiff9jNzS8nJ0f9J0BRVFxc3I4dO27cuCESidRPrKgclq6qVFkl4nyyzLx58zw9PTMyMuQj48aNmzlzpvooNEI2kpKS2raNcHT0c3Bo0aBBu0uXLrMMFAqFLM9devXqVcvateebmZ0jZD+PF2xn99PkySznwv6NyGSytWu31KsX6OLi16RJyIkTf7MMpNEIuajsjZCm6ezs7P79x9nb+zg4hNvb+wwbNjU3N1d9yNGjfzs7+1hZ/WBouNrOrl/Llp0/fPigPiQ9PX3AgAmurn4uLn7+/j3v37+vfnqKoubOXWJv38zaeoaNzRRHR9/VqzeW+F6ioq40btzB0dGvenW/4cOnZ2dnlxjCqJhLV1WqrBJxboRSqTQsLKxRo0Zv375lRpYuXTphwgT1UVwb4ePHjx88eMApsWvXrpVYD0pOnTrFaS2MpumjR49ymr6wsPD06dOcQpKTk2+wO41T7u7du8+fP2c5sUQiOXr06Pc9ey5bsCAxMZH9XI4fP87pC5GiqGPHStiiVfL58+fz589zCnnx4sWdO3c4hcTExLx584ZTSGRkZEFBAacQrouKWCw+yW7TXC49PT0qKopTSCkq6/r161wr6++//+ZUWVKpdOvWreyXrvz8/MjIyJkzZyYkJLCfS3Jy8q1bt9hPn5OTs23btkOHDnFaObt48WKJG4JKyqey/vnnH04hVaay2CjN5RNCofD777+3sbFZtGjR+vXrXV1dS1zD4toIFy1aNH/+fE5Z9erV68iRI5xC6tat++TJE/bTi8ViPp/PaRbx8fFNmjThFLJr165Bg9hedceYPn36ihUrOIUEBwdzLQwnJ6fU1FT202dkZNja2nKaxZUrVwIDAzmF/P777xMnTuQUMnz48C1btnAKad68+e3btzmFmJqactpce/HihYeHB6dZnDx5Mjw8nFNIKSorIiLi8OHDnELq1avHaR1LIpEYGBhwmkVVqixnZ+f379+znz4zM9PGxobTLK5evRoQEMAppMpUFhulubOMkZHRX3/9dejQoaSkpOjo6JMnTzZp0kTjRy7LLaqi0eX3Xgq6/HGV4l3g4yqfqCpApz6u0t90OyQkJCQkRIOpAAAAlD/caxQAAHQar3y2ZB8/fty6devQ0NCSJyWEEJKYmEhRlLe3N/tZ3L59u0aNGsw1jixduHChTZs2FhYWLKenafrEiRMRERHsZ/Hp06c7d+5w2nR+8+ZNenp6ixYt2IckJCQYGxvXq1ePfciNGzfq1avn6OjIPiQyMjI4OJj9dUVisfj8+fPdunVjP4uMjIzHjx+3a9eOfUhSUlJ+fr6Pjw/7kLi4OBsbG5YXbDGioqJ8fHxsbGzYh5w8ebJbt27sH59dUFBw48aNsLAw9rNITU19/fr1d999xz6kdJXl6upao0YN9iGoLFQWS+VQWcHBwWPGjFE/TTk1QuasMHt7e5bTM6dQW1lZsZ/Fx48fraysOF0pnJyc7OrqyukmBW/fvnVzc2M/vUwmS0tL49SemRtesP+sCCE5OTkGBgbsv3cIIWlpaXZ2dqruWVOsd+/eqbkGuVhcPy6JRJKRkeHi4sI+JD8/XyQS2drasg/JzMw0MTHhdB36+/fvnZyc2Nce4f7eaZpOSUnh1G9EIlFOTo6TkxP7kNJVlqWlJacLq1FZqCyWyqGyGjduXOKqTDk1QgAAgIoJxwgBAECnoRECAIBOQyMEAACdVrEaIaeH6VTkEIqiqCIPPNLsLKpSSMXMqnxCaJqWKTwhsixmUZVCUFlcp+d6FkjFfCOlC2GvAjXC9PR0Pz+/ffv2sQ9JSEjw8vL677//2IecO3fO29v7zZs37ENWr17dvn37vLw8ltNTFDVixIhRo0axr9iCgoKQkJDly5ezzyo5OblRo0anT59mH3L79m0vL6+HDx+yDzly5IiPj09aWhr7kAULFnTt2lUoFLKcXiaTDRkyZPr06ewr9tOnT999993GjRvZZ5WUlNSgQYMrV66wD7lx44aXl9eTJ8pPT1Vj+/btrVq1ys7OZjk9TdNTp07t378/+wcEikSinj17zp07l31W5VNZ//zzT8OGDXWzsuLj4xs2bFgBK2vo0KHjx4/XwcriTLN3bPsW7du3X7FihZWV1aFD6h53IpeXl9e0adMlS5bY2Nj8999/bEKePn3aokWLKVOm1K5d+927d2xC/v777x49enTr1q1t27Ysb3C3ZMmSqVOnNmnSZMSIESzvpTtw4MAlS5Y4OTmtXr2azfQymax169YrVqywtLQ8c+YMm5DMzMwmTZosXLjQwcGB5W2X4+LiAgICRo8e7e3t/fHjRzYhe/bsGTRoUIcOHTp37szyKQc//PDD3Llz69WrN23aNDXP31EUHh7+66+/2trabtu2jc30IpHIz89v+fLllpaWV65cYROSnJzs4+Mzd+7c6tWrs7yh+dWrV0NCQgYNGtSsWTOWt11et27dmDFjWrdu3bdvX5a37R87duzChQvd3NwWLFjAZnqapoOCgpjKOnjwIJvp8/PzuVbWs2fPWrRoMXXqVE6V1b179+7duwcGBupaZcXHxzOV1bBhQ5aVtXfv3kGDBgUHB4eFhbF8KIS8sqZOnVrWlWVlZVV2lXXt2jV5ZbF/pgcnFagRFhYW0jR969Yt9r2QCfnrr7/YVywTMnv2bJYVK5FIRCKRSCRi3wuZWTDlwbJimZDExET2FcuEXLhwgX3FMiHr169nX7GFhYUURY0bN45lLxSJRFKptLCwkH0vZLJ6//49+17IhMTHx9va2m7dupXF+/i/kOPHj7PvhUzI0qVLWVasTCYTCARSqXTw4MEse6FAIKAoKjc3t3Xr1n369GHTC5msXr16xb4XKlYWy17IhOzevdvGxiYmJoZ9yJw5c7hWFvteWPUqa/z48Q0bNmTzcA95ZbHvhUxWqamp7HuhvLLs7Oy4Vhb7Xsi1siiKUqyssuiFFaIRRkVFKS7T//77r5WV1d69e1VNT1HU5ctfPavv4MGD6iu2sLDw33//VRz58ccf3dzcXr9+rSokNTX10aNH8h/FYnGPHj38/f3VPA0rISEhPT1d/mN2dnazZs2GDx+upmJv3Lih2C2ePn3q4uKybNkyVdPTNK303i9evGhpaXnq1ClV00ulUqUFdOvWreorNjc3V3HFgqKoiRMnenl5qXn6xOvXr1+8eCH/kanYTp06qanYO3fuKHaLDx8+NGjQQH0vvHr1qmK3uHfvnp2d3YYNG1RNTxf5uCIjI62trZUGFYnF4uvXryuOrFy50snJSc3jFDIzM+/evSv/USqVDh061MfHJysrS1XIkydPkpOT5T/m5+cHBgb27t1bzXMlo6OjFbvF27dvPTw85syZo2p6WnOVpeb5AIWFhUrPNmIq69WrV6pCiq2s7777rspUlpqHQ33+/FmpsiZNmlRiZSUlJcl/LHVljR07VrOVdenSJcUfy7OyMjMz1SRWCtpvhDKZLDg4eOTIkewrNisrq169er///rvioPpe+PjxY2dn57NnzyoOqu+FkZGRLi4uDx8+lI+UWLGrVq3y9vbmVLHDhg3r0qUL+4oViUStW7dmjqjJqa/Y1NRUd3f37du3Kw6q74V37txxcnK6du2afKTEij106JCbmxunil2wYEHz5s05VWzPnj2V9iKqr9j8/Hxmt5XiYGRkpJWVlaqKffXqVfXq1ZUePKS+Yq9du+bk5BQbGysfKbEXbtu2rU6dOkq9sG3bthEREap64dSpU5X2STC9cPbs2cVOX+rKWrNmjeKg+l7IVFZkZKTioPpeePbsWa6VtXr1aqV9EmwqS2mfRHlWlqpeGBcX5+joePXqVfkIU1n16tVTVVmHDx+uWbNm0crq2LGjqspauHChtipLTS9kKktpb5/6yrp+/bqqytJsL9RyIzx48OD8+fOZPWlKi6Cqil2wYMG+ffuYPWk7duxQerVie+HgwYOjo6Pj4+OdnZ2LXXtV6oVCoTAsLOzNmzfHjx93cXFR3M+jqmLfvn3brVu3z58/L1mypFGjRorlp6pir1+/PnLkSLFYPGjQoIiICMVfqarYLVu2/Prrr8yetEWLFin+SlXFzpgx49SpU69evXJ3d1d6oGWxvZCiqIiIiPv379+8edPJyenevXuKvyq2F37+/Dk0NDQtLe2vv/5yc3NTXA9Q1QufPHkSERFRWFg4e/bsli1bMue2MVRV7NmzZ6dMmSIUCrt16zZixAjFX6mq2JUrV27cuJHZk6a02qSqF44dO/bSpUuJiYnVq1dXekRwsRUrlUq7du369OnT8+fPOzk5PXv2TPFXxfbC9PT0zp07Z2VlrVu3ztPTU3FBUtUL4+PjBw4cKBKJxo0bFxwcrPjJqOqFpausvXv3qqmsor2wxMpS6oWlq6zw8PDPnz8vXbpUqbJycnLUVJZIJBo0aFCvXr0Uf/Xs2bMSK0vpy73UlaXUC5nKunfv3q1btxwdHYtWVtFeKK+s3bt316xZs2hlFe2FT58+ZSprzpw5XCure/fuw4cPV/xViZXVtGlTpdUmVb1QsbKUnteoqrLCw8OfPn164cIFVZWlwV6o5UaYnZ3t5+fHVOzjx4+VfltsxT579qxGjRpMLyz6NMtie2FUVJSTk1N0dPTjx4+Z3dOKiu2FW7ZsqVWr1ps3b+Lj45WWm2IrlqKoyZMnM4NxcXFF32bRihUIBKGhoUOHDhWLxYpVwSi2FzKLMlOxRfetF1uxCQkJLi4up06dev36ddHlptheePr0aWdn53v37j148EDpOeNFe+HChQtjYmJWrlxZv379tLS0ou+92F4ok8mGDRvWsWPHwsLCoiHFVmxeXl5AQMDEiROFQmHRDdliK/bt27e1a9dmKrbodn+xvTAmJsbJyenSpUvPnz9nbsupSKliN2/efOrUqf3797u6uj59+vTevXuK3zu06l74008/MYNF33uxvVAsFvfu3btnz54ikSg+Pl4ppNheyFTWvHnz2FfW8+fPa9SowfRCVZWl1AvZVJZSL9yyZYuHhwenypoyZYqqyiq2F8orSyQSFa2sYnuhYmUpfucySl1ZSr2wxMoq2gtXrlzp5eWlprKUeqFMJhs+fHhISEjpKqvohmzpKqtoL2Qq6+LFi2wqi3HgwIESK0tTvVD7u0azs7NDQkJSUlKK/e369etr1aolFAqTkpKYTS6app89exYcHKy0JMmNHTs2NDSUpukLFy5MnjyZ+ZNHRUUNHDhQVQ5t2rT54YcflAa3bNmitG4oV1BQ4OjouGfPHsVBpmKPHDlSbEhSUpKxsbHiNj5N0wKBoEePHkVrlXH69GkbGxvF1UCapj98+BAUFKRqh9vSpUu9vb2VFpqEhISwsDBVe5D69+/fr1+/orMeM2ZMsdPLZLJGjRotWbKEpuljx44FBQUxK+krV65ctWpVsSHZ2dlWVlZK3yMymWzEiBGqnuV97949ExMTpW/wvLy8sLAwVUfX9+7d6+zsXHR7on379qpOxJg9e3br1q2Vvo5jYmJ69uypag9SWFjY6NGjaZq+fft2kyZNmL1PBw4cUNrqkhOLxR4eHuvWrVMa/+mnn1SdmJeammpmZhYVFUXTdP/+/Zlj22KxeMCAATdv3iw25Pr16+bm5m/evGE+Iua7iaksxb2viuSVpTj4/Plz9ZXVqVMnpcESK2vWrFlKg1u2bFF1mo9iZU2ePJnZKKcoaurUqUo7q+WSkpJMTEw4VdaZM2eYysrMzAwLC2N6D1NZqr5YS1dZffv2VRo8ffo0s/wUxVTW4sWLlcZLrKyTJ08qvc7IkSO5Vlbnzp1VVda+fftUVVZeXl6xIXPmzCm2snr06FFiZSkqsbLWrl1b7G+50n4jVEMkEgUGBjJrlFKpdODAgW3atCm6NqEoOTm5bdu2zDTp6emNGjUaNWqU+rPLrly5MnDgQMVFPCMjo+h6t6I///yT6QRyFy9eVPM3pml6/Pjxx44dUxwp8byvsLAwZp3u5cuXzIrCtm3bXr58qWr6/Pz87777rui6vBrPnj3r2LFjQUGBfEQoFKo/u+zkyZPyNcqZM2fOmTPn/fv3I0eOVBOyePFiZnkVCoXMxnp8fLzS3iQlQ4YMUarkZcuWFf2OkJPJZEFBQUU3fdTIysry9/fPyMhgHxIfH9+tWzemT/z5558DBgzIzc0dMmSIqs5B0/Rff/2ltI5V4t/9hx9+2LVrF/P/I0eOWFpaKh6sLVavXr1u3LjB/H/hwoU1a9ZUs5zQNC0SiQICAhSnkUqlPXr0UDr9QVFKSkrbtm0/ffqkOKj+vVy9elWpskq0du1aeWWdP3+ezXmb31JZNE3PmTOnbt266qumdJUVEhLCqbJOnTo1ZswYltc5MJYsWcK1ExStLPWKrSz1Z/V/Y2WxtHv37qLrWKVWoRshTdOKiwXLXkhR1Pnz55m1PJa9UGnhO3z4cOPGjdUUcNGFtUWLFmpOFSsasmPHjtq1a6taW1cKWbx4ZwFiOAAAIABJREFUcZ06dX7++ecST5tWnItMJjt16tTmzZvVnBlbNLFx48ZNmDBBfTXKf/vixQtbW1tPT09m86XE6W/fvm1pablmzRpvb2/1p00rJZCVleXl5aX0Law+5L///jt8+LCak/HoIh/X5cuXT5w4of6yB3lIRkaGm5ubm5vbgQMH2Gclk8l8fHyU9iWoD2HTC5VC2PRCpZBz58716tWLfZlkZGSEhITMmDFDzfRKIR8+fNi+ffudO3fUTy+Tyfbu3csEsumFSm9k586d7CuLMXfu3BJ7oWKIRCI5fPjwyZMn1V/u8i2VRdN0cnLytm3b7t+/z34WSUlJmzZtOnfunJq/o1JIXFzc+vXro6Oj2c9FKpW6uLhwCmGDa0gpZqFGeTfCo0ePTpo0qRTvQSAQzJw5s23btgYGBurPtKZpeubMmfLjWCx7oSKKogIDA9evX69qAplMJt9JlZqa6u/v37ZtW7bvhKZ37drVpk0bpX2e6gUFBRkaGqr/alMklUpDQ0O7d+8+btw4W1vb3bt3lxiSkpLSv39/5imvJVYsIzs7u3HjxjY2NkrnVqixevVqQsimTZtYTk/TdFRUVOvWrSdNmsRyeoqiRo0a5e3tHRQUZGpqunnz5hJDCgoK2rVr5+/v36JFCzs7O6XTZIolEonatGnj6Oj4888/s0wsOjq6c+fO3t7e+vr66nuh3LFjxzp06FC9enULCwul63+KRVHUmjVr2rdvb2lp6e7urn4dSG7ZsmXNmzdneYkhTdOFhYW+vr4sr7lmvH79umbNmuHh4ba2tvPnz1ezdGVnZ/v6+spXK1luFzK4VpZMJlu6dGn79u3NzMzUX8OgGBIcHBwQEODl5dWuXTs217QxleXv78++spiTSrp162ZlZaX+kg+5yMjIOnXqTJ8+vVmzZh06dGCT2Jo1a5o0aTJ9+vRatWqNGjVKzaU7Sn755ZcBAwaon2bs2LFqrjwpSiaThYWFqdqbXdbKtRHeuXOHOTBbil44Y8aM8ePHS6XS1NTU0NDQEnuh4h4Plr0wMzOzffv2GzZskEqlcXFxdnZ2qg4YPH/+3NnZWV42M2bM4PP5T548UfXKeXl5it+trq6uzK7O6OjoAQMGTJo0SXEXSrFWr149derUOnXqsNw/s379+lGjRtE0nZub26JFi3379qmfXiaTtWjRgvl2fvDgQd26ddlULEVRT548uXPnjo2Nzc6dO9kkduTIkQ0bNlhaWrLfP8N0KU9PT5b3Hzl48KD8yM3NmzetrKxOnDihPmT+/PnyHZh79uwxNzdnc1l0YmLiixcvatSo8csvv5Q4cVpaWo0aNZjv95MnT1pZWZXYC2NjY+vVq/f+/XuRSLRq1SorK6sSe+GuXbs6dOiQl5eXm5s7fvx4lr2QeRd9+vQpcUrG3r17Bw8eTNN0QUHBb7/9Fh4err5RicXiefPmMfvTkpOTvby8pkyZombpkslkT58+lfdL9r2wRo0aTGXdvn2bTWX9+eef3bt3LywszM7OHjp0aIm9kKKoq1evDhkyhKZpkUg0ZMgQPz8/9S2HqSxmTZRlZYnF4gkTJpw7d46m6RcvXri7u//vf/9TMz1N00Kh0M3N7c2bNzRN79ixo8SvR5qmk5KS6tWrx9TU9OnThw4dyn5TIT093cLCQtWJHSKRKCEhQX4yEZsXzMzMfPXqlfxkIpZpaFD5NUKhUNi4cePFixfn5eV169aN+ZvJRUdHq99B7OXlJd9LIBQKfX19Nd4Ls7Oz7ezsOnTo0KRJk6tXr44ePZq5TV+x8vPzExMT5fvKZ8yY4enpqWrJuHv3rrW1tXwfmre397x583r16uXs7LxkyZLAwEClI46qzJgxo06dOsxcVqxYoWYDcciQIUePHs3NzfX392e64L1799TsjUxNTbWxsZH/+PLlSxMTkwkTJqiaPjMzc9++fSdOnGC+aO7evWtra6t+O+/atWu9e/dmzi08e/aspaUlU+pRUVGqzoOQY3ph165d2RxFmDp1qmJn2r17d+PGjdWHtG/fXvGMg1mzZjFf9MXKz88/fPjwoUOHmJ2ob968cXd3L3G78MiRI926dZP/ePTo0RK3CxcvXqx4fHHp0qUlbhf26dNH8VzQ/v37c+qFLLduz5w54+XltXz58po1a4aFha1cudLS0lLpgMWBAwfkKy7BwcG1a9eWr8OlpaWV2AuZgh09ejRTsCx7YaNGjebNmxcREcGysjp16vT3338z/6coqnPnzup74erVqz09PeW3XGFu5qm+FypV1qtXr0xNTdVUFkVRLVu29PDwkH96zAKmvhc+fvzYz8+Ppuk9e/YEBgYyX4yqTtxjHDp06Pvvv6dpeu7cuUwXFIlEavrW9u3bGzRosGnTJiax77//XtW5hPfv33d0dLxx40ZCQkKvXr0U/8oURSldU884fvy4m5vby5cvT58+PW3aNMVfCQQClvc2+hbl0QjT0tKYrwxm4V65cqXSBCKRyNPTU/39orp27aoYuHHjRjc3txUrVqiftVIv9Pb2vnjxYtHJ8vPzmb/Wpk2bBg0adPv27ebNm7dr187AwEDN6klMTIyNjY38e2fevHnyLlVUdHS0tbU1M/GTJ0+GDBmyfPly5pyrFStW/Pjjj0VDZDLZ4sWLHR0dFdOePXt29erVu3Tp0q1bNzU3MFuyZEmvXr0CAgKYLkhRVJcuXZTOKVAkEomsrKwUz7cOCwvz8vJSuvyOkZCQUKtWrb59+3p7e9etW5c5kB4bG2tjY/PXX38V+/onTpxo0KCB4ilIZ8+etba2HjBggJeXl+LFwnKbNm1ycXGxs7ObNm2aQCAoKCgICgoKDQ0tepT0zZs3zAHdgwcPCgSCnTt3Kl5wlpaWZmlpqRQiPwOLORw1ffp0xUs5L1265O/vX+wbSU5Orl+/fo8ePVq1aiW/eI7pIuq/c5krqeXdQiaTNWjQwMbGRs2xlgMHDvj5+cmPVX/69Kl69ep+fn5qVuaYLzX5jzdv3nRzcxs0aFDRKYVC4aRJk2xsbNzc3Ji9x8wXrtL1qUoKCwuZuf/+++9Dhw5lvtQkEomLi4tiM5DJZD179pSvJt6/f9/W1lbxezMtLa1x48bqN7uVeuHZs2c9PT2LPUZOURSzQqZUWb/99luxlSU3ceJExV3ukZGRbm5uarrU58+f/f39W7duLd+LyPRCpWvpFDGVpXhkNDQ0tH79+sVWFvM9GR0dXa1aNcXXfPPmTYMGDdSs+BYUFFSrVm3x4sXyLnjz5s1mzZoVu6rBLFH37t2rXr264rbgxo0bhw0bpjjl48ePmWs9f//996SkpEuXLvXp08fGxmbMmDEHDhxwcHBQ+gp6+vQp858bN244OjoqncpL0/Tdu3cNDQ0VG5s8ZOfOnW5ubkX3eB04cMDc3JzTgaRSKPNGmJaW5uvrKz+ilp6eXuzhX+bM76Lj2dnZzB8pOjrawsLiwoULzPikSZNOnjxZ9Bvh06dPU6ZMCQgImD17NrNAKPZCVWeaDB06tFWrVrGxsVKp1NfX99atWzKZbMeOHTVr1iy2ccoxN+WTH4GbMWNGmzZtVE2s2Avlbt68qXSHEbnZs2f37t07NTV106ZNenp68jMJL1y4sH///qLvXSaT3b59m1lws7KyatasGRoaKhaLJRLJrFmzunfvXmxVyHf/rlq1qlatWsxFVB8/fmzUqNGjR48sLS2LHjlo0aIFc3qhTCabPXu2h4cHU8CxsbHF3mxQIBC4uLgw7zEjI2Pbtm1MJTx79mzjxo3FXgpy/Phxf3//9PT0jx8/ytd4mO3Cohe6hIWFDRkyZPHixb1795ZIJGKxuFWrVuHh4cxX86+//qp0fr9IJKpVq9bSpUsHDRrErGinp6dXr1592rRpQqFQIpEMHTp0+fLlRbOiaToiIkJ+QHTdunU2NjbMvo0XL14cP3686PQSiUR+jk/Pnj07duzI9MIbN2707t17y5YtISEhSiFCoZCpBYFA0KRJkwkTJjDfXMyO9GK3iT9//sx8JaWkpDg4OMiv1tiyZcuyZcuKDZk2bdqECRNEIlFMTAyfz1fs6PKNJEUZGRmdO3d2cHCwt7ffuHGj4oc5YsQIpbV4mqYpisrKyho3bhyzAXH//n07OzvFwGJPRktISBg+fPiAAQOYW9Uo9cJi63fXrl2Ojo42NjahoaGKX6O3bt3y9PRUf9bMixcvrK2t5Wtvv/322/r164s9VJadnc18nzC9sFevXoq9UH1lrV69WrGyvL29mcoq+ncJDg5mdpMU/a4o9uNKSUmJjo5mfrVs2TJ9fX1mvSo+Pt7T07PodYQ0Ta9bt87Ly4vpKwMGDLC1tWU+otOnT3t6eqalpSlOvHTp0lq1av3www+KF7a+evVq9uzZDg4OPB5P8ZjL5cuXmzZtKj/XLDY2ttizGhXvrrdp06ZOnTrJv8quXr1a7NtUDCkjZd4IFy1a5OXlVYrLHsVicf/+/Qkhrq6uzNcrc1/Xzp07d+rUKSAgoOjyKpFIWrVqtXz58v/++69Pnz4+Pj5SqZS5CMnLy0vN0QKpVLpp0yZ7e/sRI0YcPXq0WbNmzN+m2FXvhISEsWPHTpgwgdm+UeqF6o/hKS7fzM2s69Wrp7hmIP/STExM9PHxEQqFL1++bNiw4ebNm42NjeW9UPEFmYNzQqGwY8eOtWrVMjExYTZNkpKSWrdubW5ubmNjM3bs2KJvPyUlxdfXlxDSqlWr169fUxS1aNEiU1PTdu3a1axZ89ChQ1Kp1Nzc/NOnT69fv5avxMlkMj6fL/+DUhQVEBCg/p6EHz58sLGxefTo0bJly6ytrUNCQiws/l973x0XxfW9jbFiUHYp0qUjTapIkS5gBwSxgYqFYsMSEQsalKCCaMTYEBtR7BWCJYJGRFG+RhGlKVUEqUvfvjPvH+eT+85vGwsiYMLzF2V3Z3Zm7n3uPec5zxklXD3o6uoKRwwLC4MVK5x/R0cHUdUJP1MoFCUlJWVlZbQ+bWxs9PT0FBcXV1VVdXV1RVzLYDDgc/Lz84cNG+bs7Iw+6uPHjxYWFiQSSV5efvny5UgQ2NDQQCwqUFdXJ5ofLly4cMuWLYK+xdOnT+Xk5AYPHuzn59fe3t7a2urt7S0tLe3i4qKmpvbu3bvs7GxjY2PiW86fPz9y5MgRI0Zs376dw+FUVFSYm5vr6ektWrRIRUWFeGiEnTt3/vDDD5KSkvB4ZGdnq6ioTJo0ycfHR0dHB9mSffr0Cc3XHA5HVlaWwWBQqVRXV1fgJ7gy1dXVfB/7GTNmAL+mp6ePHDkSpvVXr15BkBNdLjab7eTkBKlQJpM5e/ZsZM+bk5MjKyvLW1KJkJ2draGhceLEiX379klJScFmrra21sDAQNBFfvLkiampaX19PZvNnjRp0urVq3EBIwt98b1796qqqlpaWkKQGUy8HB0dZ8+ebWhoyKtMplKp8+bNU1FRIZPJK1eupNFo7e3tjo6OXM6IRPCOrF27dqGRdenSJTSyuN54/fp1LS0t+FiQKXDJ3JhM5rZt2+Cxv3XrlqysrIqKipGRUWlpKXw1cXFxBQUFLS0tLqMfQGJiop2dHRoOHR0dgYGBI0aMkJOTs7GxQV7BRUVFKKxqb28/fPhw3s0ojUbbtWvXpEmT4FcMwyZOnOjv7y+6+KO5uVlOTo43QNgn+OZEiGHYqlWrjI2Nu1RWguP48ePHly1bxmAwLl++LCkpCavUurq68+fP37hxg69w+fbt21DBWlVVZWJigqwNMAzju1NpaGiIi4uLjY2FJEpjY+Pq1aulpKRGjx4tSBGXnZ2tpaWVmJh48OBBaWlpSCy9fPlSSkpKkOguJSXF09PT19cXAgVELuTy48jJyVFXV4eoHZPJfP36NZvNNjU1hRIxX19fcXFxLkEjdGBISEhISkpauXIlhmHFxcXa2to//fQTvKCmpkaQxsTX1zc+Pp5Op4eHhysqKsLUVlVVlZKSAruc2NhYuJ6PHz8GAxF4o6WlZUxMDPqc8PDw8PBwvod48+YNOOtHRkaSSCRPT08wjwgKChLkbQ/rG0dHx7t37yIWxHF8+fLlqFQOkJmZCbHo1tbWkJAQc3NzPz8/4ory8+fPJSUlxJF54sQJR0fHjo6OoqKin376iddh5OPHj1xLmXfv3ikqKqJNko+PDzGYlpCQQAxFcsHAwCA7O7u+vn7mzJm2trawz3v//v0ff/zR3NzMZrO9vb2JdWD19fXjxo2rqKj4+PGjiYnJkiVLOBwOk8m8d+/e2bNn+a6xnj17Zm9v39LSkpmZqaioCPelra3t+vXr58+fJ0ZZbG1tEQkxmUxJScnPnz8jFuRwOBYWFoKS7g0NDerq6vj/HVmNjY1sNpuYO6iuri4tLb1165aioqIgLlRSUqqoqOB7lKlTpyJZ04cPH6SkpCApWFtbK2hPEBAQAAHYqKgo2KXBheXrdIPjeGho6Jw5c8rLy/fv3z906FDYxDc3N1+5cuXixYt83xISErJ582YMwz59+oSCUm1tbba2toLE0n5+fidOnBBlZAGqqqoOHjwI/Ofs7Iy44fnz50pKSsSZk81moz5fxsbGpaWlGIaFh4erqKgAjTGZTKJxHdceWktLi9fVs729nav9xYsXL1xdXWEXcezYsbCwMA0NDd67RqVSR44ciYKWsH3n8rYVjoKCAgUFhbi4OBFf/+3QGznCbnDh7t27p06disyZ0tLSEBcKwYkTJ4KCgohjtaioSNBasrS0VEtLKzg42Nvbe/To0ai+++3bt66urlwmwghOTk5Q0XXjxg1LS0u0tnr58iXfhMfly5cnTJiQlpZ2/PjxUaNGAR8DF3J9nZMnT9bW1iYkJKirqyN1w9OnT11cXODn+fPn37lzhzc6BFxobW2N1oBVVVVELkRAcQ8ajbZmzRpiUC46OhqNWBzHm5ub/fz87O3ta2trbWxs8vLy0tLS5OXl4cSys7N//PFHkNfW1NQYGBgI6oG1detWPT09rmFWUlKipaVFnNYpFIqfn19jY+ONGzdAln3y5ElJScmFCxfCoHrw4IGhoSExBvDbb79RKJTDhw+jvCyYigEXYhh24sQJ4rUC4SWDwQgMDHRycoLdz4cPHxAX0mi0+Ph4rvXslClTMjIycnJyFBUVYR1TWlpKJpMjIiLodDpokbh8PQBlZWUbNmyYP38+/MpkMr28vBAX4jiel5cHOxh0xMzMTH9/f2QaAFUEwIV8ry2O47///ruPjw96kMA8jLhGIQKWRMnJyXDExYsXS0pKImqMiIjgyg8R0djYSCaTP3z4YGJiAvvj1tZWPT09rkDW5cuXYSV369YtlJzj4kIi2eTn54eEhKAvaGFhQRwU+/fv71TIunLlypiYmD179qBYZXh4OJcLNkJubq6ZmRmDwYAoy8GDB0eMGME3oE2EvLx8R0cHUXcGahroowSvETKyYmJi+I4sYmBj165dZDJ53Lhxd+7ceffunZSUFPpAXm5GXEi0UY2MjERcSISHhweRZiQlJYmvwTCMa+R++vQJ7TFiY2NR5HPr1q3AhRiG7dixAzaIb968kZeXJ66zv18u7CXVaFe5MD4+ftCgQUT/AuBCvoYvDAYD1lBFRUWSkpLGxsbAghwOZ/r06YKEed7e3ijscOfOHQkJCeG1q4CxY8dWV1cTWXD//v2CBDI4jmtqasJKCtQraCrPzs7mogfwcKqtrU1MTESrtvLycjKZfPny5bi4OAsLC97HCzIxf//9N5lMJrpNAhcSNWB1dXXS0tJg+Q+GhEOGDCFqd6Ojo5WUlNBM/ejRI5jmrl69qqSklJ+fTxSdZmZmampqkslk3ugN1+mFhIRADQCO41QqNS4uTlVVlSvzCooDHR0daEINb1y+fLmSklJkZOS6det0dHRQUh2wfft2MzMzCoVy+vRptGCiUCgWFhZTp06dM2fOvHnziGEDoCIvLy94WlD24sOHD8rKykuXLp00aRKvCg7iZk+fPi0pKUGT/vv3701NTSUlJSUlJQWxDrDyuHHjUB4ITmDGjBnwK51O55LPffjwAUrHiB9iZmYmJPT66NGjkSNHEvUgwIVJSUnoLxUVFQ4ODhAgbWlpQXLN5uZmCwsLCwuLmJgYHx8fR0dH3shBe3v7gQMHgMAg1AxFL2w2e8GCBcTNNGolASs5rtaVYCLKlQ2trKyk0WhTpkxBZB8VFWVqaopi+GfPnuUr88Fx/P79+3D058+f//jjj25ubjCynj9/rqmpKch4gUql/u9//2MymSjKMnv27JEjR/LVMSKoqak9ffoU6c7YbLaRkRFxGQcjC9bBaGQRlboxMTHKysrEkcUV0GptbdXQ0EhOTnZ2dp4yZYqbm5uQRUlDQwNwoYyMDPE6R0ZGWllZcb24pKTk48ePaOwsWLBg/vz5iL+vXr3KZUo+Z84cSLTj/8Rm0fy5Y8cOBQUFR0fHhQsXwguePn3KK/X6Trmw98onusGF0tLSxMDLu3fv+F5cBoOho6MDk35sbKyMjEx8fHx6ejqXmfqRI0eI05a+vj5xAKxfvz44OLjTs1qwYIGrqytiwaKiIg0NDUGGexwOR0JCgkqlorgNjuNr164lykPKy8tRRV1UVBTa/yHcvn3b3Nx87ty5RFX3u3fvnJycmpqagoODo6KicEKMFL2GQqFw7W8KCgo6OjpgCoARa2JiQkzfEs1xtm/fDnl7HMevXLnCt6q3rKyMrxCDGJgCN2HYF3I4nH379vHtzvPw4UMSiWRmZka8OCkpKRs3bjx06BCa2goLC5GLzfbt2z08PLg+h0ql/vLLL8S93aNHj2AiACrilaF/+fJl69atxL3I8ePH4+Pj4ee//vpr3bp1vCdcWVkpvEaNuEOFvzCZTCGt1/B/uJB4qSkUinC5x6NHjyQlJYmFkiUlJcQHkssnHVJuYJLHZDJPnTq1bt268+fP84oU6uvrzczMwsLCYGnS0NAwceJEY2PjsLCwiRMnBgYGosEYHR1NdGd9+vQpb6KIyWRyGXZPnTo1NDSUSqVOmTIFFkZMJnPy5MlmZmapqan37t3T1tbmm0hOSEgwMzNDisRjx46NGjUqICAgKChIW1ubbxqViPT09OnTp8PPXl5eycnJwt2a9uzZM2TIENg6YxgWGhrKayjYpZGFUF9f7+7uDs9zYmIicNL169fV1NQkJCS4HI58fX2vXLny999/GxoatrS0EGOk6DV8qzjodLqGhgboBurq6jQ1NadNm3b79u2YmBhNTU2u0hpIfyJpDBcXPnny5Pbt251mAbvhYVJQUKCoqNiHXNirBfUcDmfFihXGxsbQbpHrTnM4nBMnTqxcuRIRw6lTp6SkpISoKphMJlzr5ORkTU1NeKDv3Lkzc+ZMFxeXhIQE4j2DWQZoA8fx4OBgYo3L+fPnUSCLC+/fv4+Ojoa1D6jyli1b9vHjxwcPHujo6PB1Ibl//z6MimnTpllZWSEWTEtLMzMzQ89HeXm5oaEhMa0toqqIzWbPnz9fWloa8oLwR1F6ab548YJEIiUnJ+M4jmFYUFAQ36VJTEwMSI1EORlewHIBcSGHw5kwYQJvjBQhOjp68eLFjY2NixcvJnIhV1yosLBQX1+fONMJ6XwLSEtLMzY2RmsIJpPZaaHxxYsXlZSUOrU44IuCgoIlS5Z4enpCDri5uXnixIlce1MuPHv2bM2aNQcOHICnF55SIa6qOI7fvHkzODj43LlzcOszMjLIZLIQ0wAhXCjkKN7e3siUp7y8/MuXL0wm89KlS7/88gtxBVlfXy8rK3vo0CEhH0UEh8OB+bqpqWnixImhoaHE6ZLJZEZGRpqYmEyePJmvwzjoe7meWBihCQkJiAkqKiogToNh2JYtW8hksoODA8QnCwsLpaWlU1JSjhw5YmlpyXsR2tvbAwMDyWSyvr7+9evXgdjGjBkTGBhoa2s7e/ZsRJypqaloe/Ty5UsSiQTLKVDrdLroxzAMWpd4e3uXlJRYWVkBKVKpVGKzRkBGRgaJRNLW1kZRVjabvWjRIuH9n6Oion7//XcojIHi2ubm5i1btjg6Oq5atQoFYBsbGydOnAgBT+FcKCKAC4ODg0GfIYqC5v3793Jycr/99huHw+nUXLfH0dsWa8CFBgYG48ePJy5G2Gy2h4eHj4/P0aNHVVVVUd/tEydOyMjICOrwOXfuXFSbMXXqVERygkDkwoaGBlVV1SVLltTU1Hz58sXCwoKYFwwJCQENXmZmprKy8pIlS2RkZKDE6tOnTz4+PsrKys7OzoJyY/v374cgWGlpqYyMTEBAwMuXL6EBDXFPAN1wOi0nJ2L9+vVAyZAemzhxInE9AftC4TXXT548IZPJsIGGdkhmZmZcElziyOkSkO5/6tSpRC5cvnz5+vXr+dYL0mi0cePGwZ4PYqSmpqbV1dWHDx/mMu9PT0+XlJQEFhcRJ06ckJWVFW4Dy4VurGcB+fn56urq8fHxhw8fVlZWBgUd7AsFOTNcuHBBX1//8OHDHh4e5ubmEOmCp5RYaUDEtm3bbG1tjx8/bmFhMXv2bFBYwJXh6/gKri7Nzc1d5UISiVRTU1NQUODh4SEhISEhISGo9wWEtkRxf+ZwOEuWLEEldE1NTcLtm3Ecb29vX7duHdrjPnz40NramvgCvjGJvXv3gsFFYmIiNIyMjIyUlpaG3Mr58+f19fVnz57NV3/k7+8fFhbGYDBSUlKGDBkCA/bZs2dxcXFcmruWlhYrKysUS+QaWUuXLjU1NeVbjMFgMN6/fw/PfHt7+7Zt20gk0rRp0wwNDbnWTDk5OStXruRwOO3t7SYmJlzTBewLieF0IsrLyw0MDJDtg7q6upARfebMmaamJngxFxdmZWWRyeSukhOMI2tra3d3dxHN24ALra2tBfWa/nboA9NtDoezbds2Lm47dOgQhDFfvHhhaGioqamJum88fvxY0JQUHh5uZ2dnbm4FbExXAAAgAElEQVS+cOHCtLQ0MpksJF0HIHJhdXW1u7v70KFDpaSkUCgMAOXhZ8+eXbhwIWxBysvLNTU1hSRsiGhtbSWTySB1q6ys9Pf3NzY2XrJkCVHQBQAu7NQCDcdxNptdUVEBopWsrCyoeFuwYAHiQiaTWVdXBwltrvc2NzcfO3bszp078C+uEUvsl43QDS6k0+nq6upQtg9c6OTkVFVV9fr1a21tbb77yzdv3ujp6RFHMofDAcu6mTNn8vpfE89cRCQkJEhLS/OtqRKE7nHhokWLUGi6pqZGVVUVlLEUCoVvvK6iokJbW7uurq6trc3BwcHBwQE1Yf/w4QPvo4LjeHp6uo2NDYPB+PTpk5GRkYWFBWrC/uLFC74h+v379y9atAiUHUQu/PLly44dO4R8wbCwMCkpKRKJ9Msvv7S2tkZHRwupNBeRCy9evEgmk0Wx9ERgMpkBAQHIv/DTp0/i4uLE2SMuLo63rAjH8U2bNmlpac2dOxfJQ06fPo24UBDa2toUFRUxDCOqY4TEToVzId+R9ebNGx0dHTU1tWHDhm3duhXeWFxc7OHhgepSAKWlpTCOII1Kp9NhX4i4sKqqis1m8w2e//HHH1OnTiWGFjrlwmfPnikrK8NqlYsL8/PzhTuM80VdXd3GjRu71FPi/fv33Vh/fz36svvE9u3bUYwlJiamrq6utLRUT0+vuLg4IyNDTEyMt0oXwGAwIHTQ3t6uqalZWFh48uRJNTU1OTk5Ia3RPn/+DPeSK0Yq6CkHLlRVVUXLmcrKSuFcSAy7hYaGcjVSFwQRuRC01CDgRG3hQLYwYcKEnJwcT09Pvpm85uZmXV1dLy8vLS0tcFbERWMU4ELhYTqElJQUBwcHAwODoUOHAhfS6fSVK1cOHTpUVlaWt2M1wvr160eNGsVVqCTENKcfcuGbN28YDMbMmTOJNSHnz5+3t7cX/i4wRvDw8Dh+/DiTydTU1ERciHD27Fkk079582ZWVlZbW5uZmdmjR49qamqGDRuGuJAXMTExLi4uiHgwnr7KXGAwGKmpqenp6bBqKSgogEcaw7DZs2cL77MBXCikTBAgSqsHXrx69crV1RWYfsOGDXp6ejADPHr0SENDg1gGfvjwYVQDt2nTJjExMeJGGbhQSKa2sbFRWlq6trYWsSCDwZgwYQJfGvj8+bOXl5eNjc0PP/yAAuDCn08ajaajowN76/z8fF1dXWK/Pa5b7+LismnTJhqNNm3aNDRVAhfGx8ffuHFDR0dHUCueyspKLS0tR0dH4gNcUlLi6ekpZHDt379fEBf+u9E3RIhhWEVFRUZGhry8PDHfsGzZMkiutLW1WVtbc61YfX194Zl+/PixjIxMYGBgXV3dpUuXZs6cieN4U1PTunXrDhw4wHu4/Pz88ePHa2try8jIwOKRiwsRWCwWDLaCgoKamprs7GwSiUT0OqqsrLSysuI7j1RXV8vJyXl4eMBT/vnz51GjRom4+AUuJIr9iECpNRBwFhYWcnWnCgsLGzt2bExMDO9ekEajnT59Ggiyvb192rRpKGKZlpYmSGiOICIXFhQUqKurwyYG2gYhO7f29vZOc43r169HylJR0K+4sLy8XEVFJTs7+8yZM2pqaigtlJyc7Obm1ulR0tLSpk2bBj97eHiEhYUR94Ll5eUlJSVqamrEkrVff/0VxY7s7e03b94spPOwmJgY0RAANGt8XSIbGhosLS3d3NzMzMxcXFxQvq20tNTT03Px4sWdZnq+ERfevn2bSqUuWLAA7IVZLFZ4eLi4uLi0tLSRkREx7l1eXp6Xl6ekpETkQg0NDWKg6O7du8LVMW5ubkTrxA0bNgjqDTtp0iSYTz5+/Ghqaoq4kGtkxcfHoyXy69ev9fT00L/A1Jer7IHJZELar7m52dLSEqoYiS/43//+Z2xs7OjoyOXYzAXgQt4yKl5UV1ejxQSRC1tbWyMiIrqxF/zu0DdE+Pr1ayi5zcjIsLe3R7fZy8sLejHv3r1769at6PVUKjUvL+/58+fy8vLAhY2NjSEhIdLS0vv27bO2tkb6Rl7Q6fTx48fDjiQxMVFRUREJPtXV1bk2InFxcdbW1i9evDAwMIAlJ4qRivK9WltbDx06pKKiYmZmlpiY6Ofnt23bNhGviSAufP78uZGRERpIqampXbLpCQ4OVlVVRRsyOp1O5EJR8PHjR2VlZaBSQZRw9OhR4vY3ISEBVSvzxalTp8aNG2dmZoakRlu2bBHi1MqLXuNCQ0NDUCTx/e5lZWVRUVHgNo5hmK+vr6am5pUrV+7evaunp8c3aZecnOzr63vs2DH4wOvXr1tZWdHp9NzcXH19feKU19jYCLX8JSUlpqamKFAcERGxZMkSDMNu3rwJq0Ah2Lx589ixY4nkKui7TJ8+Hb7I+fPnyWQyVKdgGLZ06dLDhw+L6BiSm5s7ZsyYo0ePslgsIWEA4EJRbvfDhw8NDQ0rKiog+IGs9ul0OpcfWFVVlby8fFpaGjhQI7nTtm3b+DqPo/Vle3v75s2bHRwcNmzYUFNTU1VVpaOjM3ny5EOHDnl4eEyfPh0F927cuIGuJJVKHTx4MLostbW1Y8aM4RVGcTicuXPn2tjYwGnX19dz9VObPHkyUegE2mYUWmhubu60fpqIkydPmpiY2NnZgb8HcCFv20jkHcPhcAIDA2VkZCQkJNzc3GB1QuTC/wh6mwiRDcHt27cVFRVzc3OJA+zt27dycnJjxoxxcnIiJjwyMzPl5eX//vvv58+fE0uLXr9+bWtrKy4urqurK2iOzs7OBictUB6Dkh42asTgNWz/WSzWjBkzBg0aRNQFQGsFIW3t0tPTg4KCIiIiILLBYDDOnj1raGgoISGBLIhEAfQMunr1anp6OjAEm83W19f/mkbMMJUT1e10On369OkiNngDABfu3r1bUMOwP//8U0NDA5Fre3u7np6elJQU39LMuLg4BweHgoKCxMTEH374AY359evXCxLu8gVUXKSkpCQnJwsvBUM4duzYmDFjcnNzjx49KkiBxQW4gMHBwcuWLePV6WzatGnEiBEorA1OHDY2Ni4uLnxpICMjQ1dX99ChQyYmJrNmzYJE75QpU6SlpRUUFJDKCUxbcBzPyclRUFBISUkhDpOGhgZ9fX1ZWVk9PT3e+R3DsAsXLuzYsQMlJrdv366qqiq8B0VNTY2npyeO46mpqRMnTmxoaLCwsAAuFOUqEQFcaG5uLtyFXJSlz5MnT4KCgpBMA7gQkQpCfX09XB+0VuaibcgXEsMzLBZLW1sbZDsLFy4MCQlJTU318/NTUFAoKChoa2v79ddfV61alZSURPyo7du3a2hoABdiGKaqqkpcggcGBmpraxP33+i0fX19ra2tIYy5fv16IyMjSOxRKBQ1NTXixu7cuXOysrJdDR0Dbt26ZWdn9/z58/PnzysoKMBeAhI6XOlb8PRISkq6efPmggULmExme3t7QECAlpYWSHiioqLAr+4/gl4lwpycHENDQyT2vX//Pu/9bm9vLyoqQg8fKgVNTU2Vl5eHRgdEgARZSBPd9+/fa2hogAYHEtEfP360tLQkvoZKperr60Nl0qpVq6ZPn46eWgB0huMbjk9MTDQzM/v9999Xr15NtJDAMOzevXuCCgYEAbjQ2NgYXZnuCThZLBZSzYDPL1Ei2I3GyGVlZerq6lyFPrm5uUFBQWw2G7qVenh4wPLl0qVLq1at2rdv37x587g+p7q6WkdHp6mpqb6+3tTUNDY2duTIkUipJGLHQQTgQgsLC9FNi6DszN3dXfQcPsRIYVnN4XDABLW0tPTcuXNsNnvx4sVaWlqdFnIAQkNDoZyuo6PDxcVl2rRpdDod2u+hHUxNTY2ZmRlacOTk5PDqO6AkkTdmxWazPT09Z82atXPnTjk5OXRhN2/erKmpKdzCn8Vi0el0NTU1IAwobxelpTMvcnNz+SYpuLBhwwYIiVMoFL7nFhAQMHjwYGS1j/+zYSKGHyoqKoyMjJDbQ1ZWFt8U4OnTp7kionl5eXJycr/++ivRKH/nzp1GRkZCBsiOHTtUVVVhFXX+/Hk5OTm4U2w229ramq9upbS0NCIigkQiAYUzmczg4OBRo0bNnDlTXV2d16dNeB8bQbh3756fnx9a/YBeHWISlZWVvAsa4EInJyeklscwbNasWSgU11XV9HeNXi2oNzc3FyI848WdO3cmTpyI9hmZmZmiB/SIx500aZKEhAQMj9bWVktLS2Is4vXr14WFhQ8ePJCXlwcuZLFYPj4+iAuzs7O/fPnCN1DO4XAUFBRgI0ismv8aZGVlca0PusqFVVVVxsbG4uLiOjo6kEHpht0DL3gr8EAy5+Pjw2Kxmpqa3N3dpaSkrK2t9fT0Kisr//jjD94kGYvFgt22i4sLBGdWrVo1bNgwZHHXVTx9+rSrNrapqaldUrLhhO9eWFhIIpEOHDgwfvx4YEQoCeAqoObFjRs3ZsyYMXHiRFQJTqVSXVxceKUu/v7+Tk5O3UvM7Nu3DxoyQ3hfRkYGrRGvXbvGe/cZDMaNGzcuXLgAJ19UVKSqqspisTAMc3Nz61LZSfcAXGhlZcU3uwGlcrq6usR5nMlkEi/17NmzZ82aJfqDzWazo6OjYdkBXKimpobezuFwlJWVhVflE7kwLi5OQkJi2rRpRkZGfLNxMHiTkpLu3btna2uLtrPQzVRQZyURuTApKQldt5UrVw4aNIgYR921a5dwkxDo90Ss7UlOTnZ0dBR+0H8lenVHWFFRoaGhIWKNCIvF0tPTEyQc5QtU1VdQUGBvby8vLz9v3ryKiorKykoTExNjY+OVK1dqa2tzNQ+7cuWKsrIycCEK0wEXmpubx8XFaWpqovpWDofj7u6O8kzQdhz/vyy4c+fOHtdZdYkLV6xYER8fz+FwDh8+LCUlBTMvdFYSMYQoOohciON4fn5+RkYGjUajUqm2trbEDoi3bt1CO5uCggLUdWHt2rWnTp3qRgiur5CUlCQmJkbUm3TKhZmZmbq6usePH7exsbGwsEBhfyqVumzZMq51z9eo9X766afm5ubi4mJQX1+6dGnQoEGC4iVtbW0WFhYLFixYsGCBgoIC1Ck5OjpaWVlNmjSJ6Mr0TbFp0yauTpnPnz/ft29fSkoKBBuEX9umpiYLCwt/f39RuJDNZjMYjKCgIPBex//hQtQMFcMwXV3dTpukE7mwqqrq4sWLRLMx4oYyPDwcCQXAOoc3tMsXnXIhhUJ59eqVgoICcCGGYQEBAerq6ih1unv3bt7sIBeeP38uKSmJWmpHREQQPeX/O+jtHGGXuFB01ROO4xQKRUFBISYmhsPhGBgY3Lx5My8vb+3atTIyMkVFRXQ6/ebNm0ePHkWxVg6HgxwvL168qKKiwmVRyOFwYmJilixZwpVcOXv2rJSUFCJdNTU1b29vxIKpqaldyguKDiFciNJdDAZj6dKldnZ26F/nzp2TlpYGd56uboME4fXr18SWyC0tLUZGRvPnz0fq0KysrPHjx+/bt4/4rlOnTsnIyAAX1tbWksnka9euXbp0SVdXV7iKr7/h7t27v/32G4lEIvYbYbPZq1ev5luj9vLly5CQEHAaYjAYnp6ednZ2gmz5AF+pXJ87dy7EPD59+jRp0iRBs+FPP/0EabyqqipdXV1IbkFnY1GctL4RTp8+ra+vDwazDg4ODQ0NPciFv/32G/jNBgUFoZ0QcKGvr29qauqqVatmzZrF+0YKhRISEjJ58uSdO3dCAJ/IhUQwGAxra2tkwb97926iJdvbt2+HDRvG66TIF0K4sLa2VklJKSsr69WrV9ra2qjKJSAgQElJ6eDBg3v27OHb6BTDsKNHj3p7ex86dAj1uRw9evSECRNcXFycnJxEDPL/y9AHqtFvx4WFhYUKCgphYWHE1FR4eDhvs1xwMCFWBIoikUJxDCIXPnjwYPDgwUeOHPny5Qu47xNLYnsWxcXFysrKUPWBNqktLS1jxoxBRo5hYWFiYmJElUpiYqKMjEwPPt+lpaVjx44ltjK/e/fuiBEjQkJC4NeOjg6+a+rTp0+jHWpycrK+vr6zszOXm3Z/xtu3b318fGA6zszMRFyYk5MjqG8XjuMrV64cMmQIMiUBB+oe5EIOh3Pq1Km1a9ciZYetrS3UXEdERAixW7K2tn7z5k11dTXqKfHkyZO+nQchwADhARqN5u3tDSWzbDY7PDxcSMa9Uy6ESZ/ovU5M+QMXmpubJyUl8V7wjo6O8ePH//LLL1euXHF0dNTX14dnYNu2bVpaWsR8TVlZWVlZGTSiAi4sLi6WlJRECpr8/PyFCxeK4u8P2LhxI6RRa2pqIAWDcskPHz4Ebw3iAhe4cOTIkUeOHOF7KxcvXuzt7X39+nUbGxs3Nzf4NMgXCtHe/+vRN+UTPcuFFArF398fphXgQmNjY7SepVKpP/74I1elamVlpaKiInFb0ylYLJahoSFyhyJy4V9//WVtbS0tLT1z5kxBQf+eAnChp6eni4sL2oFVVFR0dHSkpqbCr7z9w3pcCV1aWqqqqoq48PLly5GRkXy/e2pqKjH0Fx8fLyUl1akzcj/Es2fPtLS0iJrAzMxMaWlpPz8/TU1N1LiYF7DqMjAwQBMTk8lcs2aNcDdtXGQunDdvnq+v78mTJy0tLWfOnAmhDklJyXHjxtnZ2fHqj5qbmydNmlRcXLx48eK1a9eamprCl4K2DF0qL+kRvHnzBkZrY2OjnJycnp4eGrwdHR1ycnLCvWAQhHBhVlYW6n3BZDKRkzgReXl5KEDKhdOnTyOxOpvN9vLymjNnDvyKMr6ACxcuQBu1mzdvoiXItWvXRo8evWXLlosXL5qamnapHAL/hwvNzc0fPXpEoVAsLS3RIjgtLY3X9w5KRfX09LjKS3Acv3z5sru7O4Zhubm5BgYGDg4OqCVZXl7ef6FeUBD6zFmmrKxMTU0Nnjzhlvy4UC6sra2l0+mrV69GS2zgQtQqlkKhkEgk3ukAauqFK7wRkpOTk5KSoMAIKSe5YqS9huLi4kWLFnG5Qr9//15aWhop0MLDw/nGbb4G2dnZAQEB4eHhMKGXlpZqaGgsWLDgyJEjampqgvbBoaGhXNXT06ZNQ21rvhdgGKajowML+fb29gsXLkCos6ys7Ndff+Vrh/bmzZuoqKgLFy4wGAwRBTW8aG1tnTRpEjh4FRUV8VoTpKenQ3UQSG9Qsqe6ujorK4uvlUFMTAy04CgoKJCQkABnZBqN5ufnt3bt2i6d3tcDwzAnJ6c1a9YA+d25c2fYsGFEl6UpU6aI7i7b2Nhoamq6dOlSDodTUFAAn1lXV9dVi1ouHDx4kKuP7vDhw7kGIGLrkydPqqurc91o0FfPmTOne6exZcsWaEUwf/78WbNmdRq1hn2hiYkJkdswDEtOTi4oKACn2bdv39bV1Q0dOtTZ2bl7FvP/JvSlxRpwITQs7TR9BdUwYMZNdPDbtGmTu7s7cCFSwQAXTpky5cCBAxYWFoK6e4jOhUVFRSoqKhcuXKiqqiIa7/YVFxLR0dEBwRnoPoGmwrCwME1NzW7obPnizp07Ojo68fHxy5cvR7TX2Ni4YcMGHx8fvvshtL7ZuHEjkQvBk7P/q2MaGxsnT54M4TgWi/Xjjz/m5uYeP35cXl7ezs6ORCKhNim8uHr1qr6+fmRkpKur64QJE6AF1ddwoZub2/jx46urq6FUEQlNjxw5sn79eiqV6urqCrf+y5cvQmpeFy9ebGJigrYUmZmZGhoacnJyUlJS27Zt6+U9AbQVbGtrs7OzQ4nMO3fujB49+vjx49CyUVVVtUuPCnDhjBkzTExM0Ba8G14KRHz8+FFCQgKVM7a2tv7444/ERcb27dsXL16MfhUSHvhKQB/sTjuH4DiOYdjz58/z8vJgfouKikI27hEREciL0d7ePjY29r+8FwT0JRHiOF5WVjZ37lxBdnlcAC60sbGBxuUwobBYrLlz50JZGPHhAC708PAQpQOcoDxKW1sbiv4VFRVpa2tzhVhxHD9z5oxwS8lvjZ07dyKnGC4u7CmGbmhogA7DYAE8bdo0YsUkX1CpVE1NTXT1Nm7cqK6ufvfu3cTERAMDg/5fovThw4eWlpa9e/caGBjATT969OioUaMcHR1BHxgREUHMkhJBp9OVlZWhGu/w4cNubm6gBuJwOFu2bOG7fRSO1tbWefPmlZSUQBNBPz8/KEDEcfzly5fy8vIuLi5omgsODhYS88/IyJCQkOA685KSkj7ZE0BVT2RkZFtbG7EZGewL1dTU5s+fD871XUJjY6OPjw+XFldELkTXoba2ds2aNYGBgZDDPnv2rISExN69e6Gd4a5du9Bbvnz5Iisr26mxXE9BdC7EcfzTp08aGhr29vbEAPuOHTt8fX1ZLNbhw4eJ/P1fRh8TYVdRWVkZGhoKlUBIi8VisU6fPs37WBQWFoqS/hXEhW1tbba2tkQRhKBUTV+Z0n7+/Hn69Omurq7i4uKCuLDbyM3N9ff3h6ViRUUFBF0XLVq0d+9eDMNsbGyEcGFsbKyBgYGTk9PQoUPRnHvy5EldXV0nJ6fvQh3z888/29jYtLS07Nmzh3eDVV9fb2BggKK7LBZr2bJlSF2MikOOHDkyZcoUYMETJ058vQ7z6NGjfn5+DAbDz88PiR5XrFihrKz84sWL+vr67du329nZ8cZXmpubV69eDWY3GRkZkpKSXWr+1bNobW1ds2YN7Kjq6upQJzwi7t69SyaTeV1avgadcmF+fj5UUrFYrPHjx2/ZsmXjxo1kMhnK0p88eTJ79mwHB4fjx49z3cr8/HxRTFZ7Cl3iwp9//nn48OGhoaHoLxQKZcKECUOHDrWzs+tqfOLfiu+MCAFv374dMmRIDwYki4qKFBUVY2JiKBQK0sTT6fSZM2ei5Xw/hL29PRh/1NfXOzk5IS589erVmTNnvuaTP3z4AEJ/VCCI43hubu6ECRPg58DAwE2bNiE7DyJevnypp6cHZ/L27VslJSVBO6d+CAzD4EthGLZu3TroeUR8AYvF+v333zU1Nbm6hfz8889jx46FCZ1Op0tKSm7YsAGx4MOHD3mly109Mfwfq1jgQmQjwGazIyMjVVRUZGVl165dy7u3o9FoZmZme/fuRU8ySF5Faf7V4+jo6LC1tSU6fgmaze/evUsikXqZC6OjoxUUFK5evRoUFAR/uXXrFolE4msbS0T/5ML29vaSkpKSkhINDQ3U5BXMZoXbDP3X8F0SIf6PEF9I8/quArhQQ0ODaBXdn7mQyWT+8MMPaJpubm5WVFR0dXUV0mNFdMyYMWPVqlUMBsPLywtNAVlZWRoaGk1NTR8+fNDR0SFmH+/fv49+PXXqFNEyFDpqfRdcCF2KkCYLwzBUaUpEbGwsX/n7vn37EBceOXJk8ODBkESEVkHdCPEh5OTkoA4V0NyqS2Y6+/bt4/WNhG4+367URxAwDFu5cmWn3dsBd+7cEZLv7B6OHTuGejWjwvaWlhZU1B8dHT1y5EiiXf7t27f7IRdWV1fr6uquW7cOwzDeLjcsFmvt2rVoJQcx0k2bNrFYrDVr1ghq+/yfxfdKhPi34UJeZXN/5sJx48YRA1yLFi2ytrYWbqokHEwmE/Jh7e3t9vb2vB4TK1asGDlypKysLNc6PSAgwMXFBbjw3bt3JBKJmKExNTUl1lr0W7x9+3bEiBF8ya9TJCcnu7u7jx49Gtlbnzx5csyYMeLi4tAtsttnVV9fLyMj01XNPRHz5s3jKnMEAuies/PXo0tc+C0AXLh9+3ZoX4phGOgPED1ER0dLSkqibsA4jt++fbvTnmV4H3HhokWLDAwMuIosAwICAgMDiSqYT58+mZqajho1KiAgYEAdw4XvmAjxb8CFfEGn02fMmNFPuJDNZufn58NDn5KSgmJHX7580dPTq66u5i2aFBFQa4ySi+3t7ciNl4jPnz8T94JpaWkLFixgMBjLli2D9gU4jq9Zs8bIyAi6rN29e9fT0xPqDvuk93SXcO/evW6kpjIyMqARRG1tbWBgILHVg/CqeRFx8OBBOTk5Xsd5EbF3715ig1Ymk2lubt5Vf/OeRZ9zYUJCgoODA9ydOXPmXLt2raKiQlNTEz3zBw4c6N41h+Ybx44do9FoQnTFPYXq6mp7e3s4T+RBU1VVNXbsWA6HA61Ipk+fHhERARFR3uLCAeDfOxHi/zEurK6utrS0tLCwGDNmTEBAAI1Gu3z5MolE0tPTk5eXh+ymkpKS8HadgnDx4kUpKamuvpfBYLi7u8+dO5fBYCAVDIvF2rJli7i4uLa29rhx4yD+VlZW1m1n7d6EiFxI1PSHhITs378f/erv799p26OuoqtcSKPRrl27BuOivb1dV1d3/vz5X758aWpqWrhwYZcsfL8RoO67D7kQrQygl++1a9eqq6uJKeGv5MLx48dzNT/6poDNKDg6USgUGRmZPXv2mJmZmZiYxMfHa2pqIsONAfDiuydCHMcTEhJkZGTevHnDZDJ7toScCCqV6ubmNnXq1NzcXBE9cXoc06dPP3fuHI7jOTk5ZDIZ8hx0Oj07Oxtmk4MHD7q7u3f783fu3ImyXMJBoVC8vLxgWcBgMHx9fXlzYM3Nze/fv++RnGUvo1MupNFoGhoaqDh63759Xl5e6L/5+fnQU7Nnz0p0LmxtbYXurDIyMqDPrK2t9fLyGjFihKSk5M6dO/tJZKxvufDChQsoRJGXlxcYGMj7mpiYmG43okI9sHoBMTExxcXFKSkpiAsfP37s7++flJQEfO/t7f010fV/Pf4NRIj/w4VTp07l6izRs4CyZeQc3cugUqmysrI4jpeVlY0fPx6q2ZBuk06n+/j4ODs7oz7m3YMoXMhgMMzNzWNjY7/mQP0cQrjwwYMHmZmZb968gZa5OI7X1NTIy8ujzNCff/65evVqLg/3HoEoXAjWo9Aco7a2dsKECYsWLYJaBcf287IAAAUgSURBVDqd3ldW2oLA4XBWrFhhbGyclZXl7+/fa8e9f/8+iUTqq81oj+Ps2bOqqqrFxcX37t3j3QwcO3bMysqqr6q8vgv8S4gQx/GEhAQh/sI9BSqV2u1UzVeCzWZLSUmlp6cbGRkBC7a0tKipqSG5/Pv373ukUB24UPjeOj4+Xk5ODhmU/CuRnJxMIpHS0tLa2tqIsffHjx/LyckBF966dQv+mJ2draSk5Obmtn79+rFjx367K7N//34FBQWIQqNA/cOHD5GLSlBQkIGBAeoK1NTUROTCfgjgQjKZ3Jvry46ODmdnZ+RL8J2iqKjo4sWL8PPZs2d5nT2am5sdHBxcXFz+NZT/jfDvIcL/ArZt24Z6b9JoNA8PD2JqqgcxwIUA4MIJEyYkJibSaDSkG3z8+LGysjLXno9CocTHx+/fv797OVrRAVwYHR2NylQCAwMjIyPhZ7BlJwb6gAv5Sp/6CTgczrdLagjC986FRUVFOjo6xNUDXz/FvLy8/hYG6IcYIML+iLS0NBicnz9/dnJyGjJkiImJyf3798HBRFpaes6cOVpaWlu3bv12j/iOHTvU1dXLy8tramoEdQf9j3DhuXPnaDTalClTiB1u+9Yr9eDBg+bm5ugcOBwOnU4PDQ2FfSFwIaJGvOdaUf7L8F1zYWFhoaKi4qFDh/r6RP4NGCDCfgcMw1xdXWfMmAFS1TNnzjQ3N587d05SUhJ0oW/fvr148WIvlEIDF5qZmQkpJf4vcCGO43V1dfr6+qtWreo/i2tI+fj5+YFLJ4ZhwcHByNQb2jgTLTEHwBcDXDgAfIAI+ydgcM6YMYOoPExLSxs9erSIBuU9hV27dt2/f1/4aw4fPowUHP2HJ3octbW1hoaGAQEB/eQ7UqnUDx8+oIQlzsOFxcXFKioqqGPzAASho6PD0dERGff322QqXwxwYY9ggAj7ETAMmzNnDuyugAtHjBhBtAS0srLqn12kgQsvX77s5ubWT3jiW6BfceHjx48VFRXfvXv3+PFjcEjBebiwl5dN3y8QF4aEhIjiINOvMMCFX48BIuwvaGlp+fTp0/Xr12F2w//hQmdnZ/C/YLFYqDdsP8R/JEbaH7gQFbfcuHGDtwcIh8NZunSpqanp9xjr60PAcFuxYkX/7xHGiwEu/EoMwnFcbAD9ADdv3gwNDX306NGrV68ePnx44sQJMTExKpU6a9as8vJyLy+v7OxsCwuL2NjYvj5TgWhubiaRSH19Ft8cdXV1kydPtra2jo+PHzRoUC8fPTk5OSoqKiMjY/jw4WJiYn/99ZelpaW4uDjxNWAXPnXq1F4+t+8ddDp9+PDhvX9PewT5+fnOzs7btm3T0NCg0Wg+Pj59fUbfEwaIsO9RUVGhqqoqJiYWHx8fExPz9OlTRUVF9F/gwvr6+lu3bmlqavbdaQ7g/6O2ttbZ2dnJyWnz5s03b95cv3597xyXyWQaGBj4+Pjs2bOnd444gO8IwIXKysr379+XkZHp69P5njBAhH2MpKSkhISE9PT0wYMHi4mJ3bt3b/LkycOGDSO+hkqlpqenz5o1q4/OcQB8AFxIoVCuXr1qZ2fXa8etqKhwcnLy9/ffuXNnrx10AN8LCgsLZWRkBliwqxggwr5EW1sblAP22pZiAD2I2trasrIyKyurXj7uABcOYAA9iyF9fQL/aYwaNerPP/90dXUdNmzYqlWr+vp0BtA1yMnJycnJ9f5xVVVVHz9+7OTkJCYmNsCFAxjA1+P/AfR8Sk7KDz4ZAAAAAElFTkSuQmCC", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "masked_toks = toks\n", + "scatter(entropies, markercolor=colors, xticks = (1:size(masked_toks)[1], masked_toks), rot=45)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c564a9bd-ff84-40c3-b4bd-3c301bb8cfa7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/RELITC_training.ipynb b/dev/notebooks/RELITC_training.ipynb new file mode 100644 index 000000000..7b804c322 --- /dev/null +++ b/dev/notebooks/RELITC_training.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "098c3a0a-8fc0-44da-ab50-fe289ef9f56f", + "metadata": {}, + "outputs": [], + "source": [ + "using Pkg" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88746cde-6260-40cd-92c3-0316deced701", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `C:\\Users\\drobi\\Desktop\\uni\\master_thesis\\CounterfactualExplanations.jl\\dev\\notebooks\\Project.toml`\n", + " \u001b[90m[438e738f] \u001b[39mPyCall v1.96.4\n", + " \u001b[90m[21ca0261] \u001b[39mTransformers v0.2.8\n", + " \u001b[90m[d66529d5] \u001b[39mTrillionDollarWords v0.1.0\n" + ] + } + ], + "source": [ + "Pkg.status()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d12f0762-09fa-4013-b52a-466e30028eef", + "metadata": {}, + "outputs": [], + "source": [ + "import Transformers\n", + "using Transformers.TextEncoders\n", + "using Transformers.HuggingFace" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "66f296ec-2419-43db-bd2a-5a62de2eafeb", + "metadata": {}, + "outputs": [], + "source": [ + "using TrillionDollarWords " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7eb64489-f40e-4dbe-b447-0e87d8e20104", + "metadata": {}, + "outputs": [], + "source": [ + "cls = TrillionDollarWords.load_model(; output_hidden_states=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c97eb28b-1ad1-416b-a97b-8a4c02ed1a62", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2×8 DataFrame
Rowsentence_iddoc_iddateevent_typelabelsentencescorespeaker
Int64Int64DateString31String7StringFloat64String?
1111996-01-30meeting minutesneutralThe Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead.0.999848missing
2211996-01-30meeting minutesneutralConsumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out.0.999584missing
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& sentence\\_id & doc\\_id & date & event\\_type & label & sentence & \\\\\n", + "\t\\hline\n", + "\t& Int64 & Int64 & Date & String31 & String7 & String & \\\\\n", + "\t\\hline\n", + "\t1 & 1 & 1 & 1996-01-30 & meeting minutes & neutral & The Committee then turned to a discussion of the economic and financial outlook, the ranges for the growth of money and debt in 1996, and the implementation of monetary policy over the intermeeting period ahead. & $\\dots$ \\\\\n", + "\t2 & 2 & 1 & 1996-01-30 & meeting minutes & neutral & Consumer spending had expanded modestly on balance, growth in business invest- ment in capital goods appeared to have slackened somewhat recently, and housing demand seemed to have leveled out. & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m2×8 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m sentence_id \u001b[0m\u001b[1m doc_id \u001b[0m\u001b[1m date \u001b[0m\u001b[1m event_type \u001b[0m\u001b[1m label \u001b[0m\u001b[1m sentence \u001b[0m ⋯\n", + " │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Date \u001b[0m\u001b[90m String31 \u001b[0m\u001b[90m String7 \u001b[0m\u001b[90m String \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ 1 1 1996-01-30 meeting minutes neutral The Committe ⋯\n", + " 2 │ 2 1 1996-01-30 meeting minutes neutral Consumer spe\n", + "\u001b[36m 3 columns omitted\u001b[0m" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = load_all_sentences()\n", + "n = 2\n", + "queries = df[1:n, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "667a0a96-53ae-4aec-8ec7-14013291ec0e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(hidden_state = Float32[-0.6870835 -0.24942288 … -0.0025752056 -0.0017432718; 0.6610133 0.31153536 … 0.013969824 0.0137721775; … ; -0.8090527 -1.3571459 … -0.17556852 -0.17569499; -0.5847161 -1.2344005 … -0.044540238 -0.04152311;;;], logit = Float32[-3.221773; -3.1708598; 6.291086;;])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "logits = cls([queries[1, :].sentence])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f9e9713-ddb3-4f79-841b-100c9a09328a", + "metadata": {}, + "outputs": [], + "source": [ + "Transformers.HuggingFace.save_model(\"Julia_FOMC\", cls.mod; path = pwd(), weight_name = \"weight\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e0b2bb70-d889-4f73-92c0-a8719a4aa6a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"C:\\\\Users\\\\drobi\\\\Desktop\\\\uni\\\\master_thesis\\\\CounterfactualExplanations.jl\\\\dev\\\\notebooks\\\\Julia_FOMC\\\\config.json\"" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Transformers.HuggingFace.save_config(\"Julia_FOMC\", cls.cfg; path = pwd(), config_name = \"config.json\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/notebooks/importances.py b/dev/notebooks/importances.py new file mode 100644 index 000000000..f47dacd25 --- /dev/null +++ b/dev/notebooks/importances.py @@ -0,0 +1,18 @@ +import os +import json + +from transformers import AutoTokenizer, RobertaForSequenceClassification +from transformers_interpret import SequenceClassificationExplainer + +def extract_importances(model_path, input_strings): + model = RobertaForSequenceClassification.from_pretrained(model_path, local_files_only=True) + tokenizer = AutoTokenizer.from_pretrained("gtfintechlab/FOMC-RoBERTa") + + scorer = SequenceClassificationExplainer(model, tokenizer, attribution_type='lig') + + attributions = [] + for t in input_strings: + attributions.append(scorer(t, index=0, internal_batch_size=1)) + + with open('temp/attributions.json', 'w') as f: + f.write(json.dumps(attributions)) \ No newline at end of file diff --git a/dev/notebooks/word_attributions.ipynb b/dev/notebooks/word_attributions.ipynb new file mode 100644 index 000000000..65e5b25e4 --- /dev/null +++ b/dev/notebooks/word_attributions.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 21, + "id": "255175ec-037e-4238-9db3-24c24d324544", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import transformers\n", + "import transformers_interpret\n", + "\n", + "from transformers import AutoTokenizer, RobertaForSequenceClassification\n", + "from transformers_interpret import SequenceClassificationExplainer" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "558306bd", + "metadata": {}, + "outputs": [], + "source": [ + "PATH = \"C:/Users/drobi/Desktop/uni/master_thesis/CounterfactualExplanations.jl/dev/notebooks/model\"\n", + "\n", + "input_strings = [\"dict, it contains the list of tokens and the list of feature importances\",\n", + " \"we should invest in the new technologies to increase our chances in the following term\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a713410b", + "metadata": {}, + "outputs": [], + "source": [ + "model = RobertaForSequenceClassification.from_pretrained(PATH, local_files_only=True)\n", + "tokenizer = AutoTokenizer.from_pretrained(\"gtfintechlab/FOMC-RoBERTa\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "99fe89ef", + "metadata": {}, + "outputs": [], + "source": [ + "scorer = SequenceClassificationExplainer(model, tokenizer, attribution_type='lig')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "164b6931", + "metadata": {}, + "outputs": [], + "source": [ + "attributions = []\n", + "for t in input_strings:\n", + " attributions.append(scorer(t, index=0, internal_batch_size=1))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "e7ef3cfc", + "metadata": {}, + "outputs": [], + "source": [ + "with open('temp/attributions.json', 'w') as f:\n", + " f.write(json.dumps(attributions))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "352a1979", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "relitc", + "language": "python", + "name": "relitc" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/CounterfactualExplanations.jl b/src/CounterfactualExplanations.jl index 2a03dbd9f..22a403827 100755 --- a/src/CounterfactualExplanations.jl +++ b/src/CounterfactualExplanations.jl @@ -87,7 +87,7 @@ using .Convergence # argmin ### include("counterfactuals/Counterfactuals.jl") -export CounterfactualExplanation +export CounterfactualExplanation, TextCounterfactualExplanation export generate_counterfactual export update! export total_steps, converged, terminated, path, target_probs diff --git a/src/counterfactuals/Counterfactuals.jl b/src/counterfactuals/Counterfactuals.jl index 76d122672..f85d9665a 100644 --- a/src/counterfactuals/Counterfactuals.jl +++ b/src/counterfactuals/Counterfactuals.jl @@ -21,5 +21,6 @@ include("path_tracking.jl") include("printing.jl") include("search.jl") include("termination.jl") +include("text_counterfactual.jl") include("utils.jl") include("vectorised.jl") diff --git a/src/counterfactuals/text_counterfactual.jl b/src/counterfactuals/text_counterfactual.jl new file mode 100644 index 000000000..c2056b4ff --- /dev/null +++ b/src/counterfactuals/text_counterfactual.jl @@ -0,0 +1,46 @@ +""" +A placeholder struct that collects all information relevant to a specific text counterfactual explanation. +""" +mutable struct TextCounterfactualExplanation <: AbstractCounterfactualExplanation + x::AbstractArray + target::RawTargetType + target_encoded::EncodedTargetType + x′::AbstractArray + M + generator::Generators.AbstractGenerator + num_counterfactuals::Int +end + +""" + function TextCounterfactualExplanation(; + x::AbstractArray, + target::RawTargetType, + M::Models.AbstractFittedModel, + generator::Generators.AbstractGenerator, + num_counterfactuals::Int = 1, + ) + +Outer method to construct a `TextCounterfactualExplanation` structure. +""" +function TextCounterfactualExplanation( + x::AbstractArray, + target::RawTargetType, + M, + generator::Generators.AbstractGenerator; + num_counterfactuals::Int=1, +) + + ce = TextCounterfactualExplanation( + x, + target, + target_encoded, + x, + M, + deepcopy(generator), + num_counterfactuals, + ) + + + + return ce +end diff --git a/src/generators/Generators.jl b/src/generators/Generators.jl index cb0f0d339..3fcc91e09 100755 --- a/src/generators/Generators.jl +++ b/src/generators/Generators.jl @@ -16,6 +16,7 @@ using DataFrames using MLJBase using MLJDecisionTreeInterface using Distributions +using PythonCall using Random using Statistics @@ -61,6 +62,7 @@ include("non_gradient_based/base.jl") include("non_gradient_based/feature_tweak/generate_perturbations.jl") include("non_gradient_based/growing_spheres/growing_spheres.jl") +include("non_gradient_based/relitc/relitc.jl") "A dictionary containing the constructors of all available counterfactual generators." generator_catalogue = Dict( diff --git a/src/generators/non_gradient_based/relitc/get_attributions.jl b/src/generators/non_gradient_based/relitc/get_attributions.jl new file mode 100644 index 000000000..25d229750 --- /dev/null +++ b/src/generators/non_gradient_based/relitc/get_attributions.jl @@ -0,0 +1,18 @@ +function load_scorer() + transformers_interpret = PythonCall.pyimport("transformers_interpret") + transformers = PythonCall.pyimport("transformers") + + # load pre-trained classifier and corresponding tokenizer + model = transformers.RobertaForSequenceClassification.from_pretrained("model", local_files_only=true) + tokenizer = transformers.AutoTokenizer.from_pretrained("gtfintechlab/FOMC-RoBERTa") + + scorer = transformers_interpret.SequenceClassificationExplainer(model, tokenizer, attribution_type="lig") + + return scorer +end + +function get_attributions(text, scorer) + attribs = scorer(text, index=0, internal_batch_size=1) + attributions = pyconvert(Array{Tuple{String, Float64}}, attribs) + return attributions +end \ No newline at end of file diff --git a/src/generators/non_gradient_based/relitc/relitc.jl b/src/generators/non_gradient_based/relitc/relitc.jl new file mode 100644 index 000000000..965cd2fac --- /dev/null +++ b/src/generators/non_gradient_based/relitc/relitc.jl @@ -0,0 +1,207 @@ +include("get_attributions.jl"); + +# [Relevance-based Infilling for Natural Language Counterfactuals](https://dl.acm.org/doi/10.1145/3583780.3615029) (RELITC) is a Language Model counterfactual explanation method. It uses LM feature attributions to identify tokens in the original text which contribute the most to the LM classification. Once idendified, the tokens are masked and a Conditional Masked LM (CMLM), like BERT is used to fill the masks, creating a new text that should be classified to the target class. RELITC additionally tries to quantify the uncertainty of the CMLM to guide the infilling process. The only parameter of the method, $K$, percentage of masked tokens is established through beam search. + +mutable struct RelitcGenerator <: AbstractNonGradientBasedGenerator + filling_scheme::Union{Nothing, Function} + attribution_generator::Py + cmlm +end + +function RelitcGenerator(; + filling_scheme::Union{Nothing, Function}=uncertainty_filling, + attribution_generator::Py=load_scorer(), + cmlm = load_fomc_cmlm() +) + return RelitcGenerator(filling_scheme, attribution_generator, cmlm) +end + +# Generate counterfactual using RELITC (to be dispatched) +function relitc!(ce::AbstractCounterfactualExplanation) + text = ce.x + attributions = get_attributions(text, scorer) + cls_tkr = ce.M[1] + cls_mod = ce.M[2] + + cmlm_tkr = ce.generator.cmlm[1] + cmlm_mod = ce.generator.cmlm[2] + + word_attributions = group_into_words(text, attributions, cls_tkr) + idx_to_mask = get_top_k_idx(word_attributions) + + toks = decode(cls_tkr, encode(cls_tkr, text).token) + mask_toks_at_idx(toks, idx_to_mask) + + cmlm_decoded = decode(cmlm_tkr, encode(cmlm_tkr, masked_text).token) + + idx_to_mask = get_idx_cmlm(cmlm_decoded) + + left_to_right_filling(copy(cmlm_decoded), idx_to_mask, cmlm_mod, cmlm_tkr) +end + +# The tokenizer returns tokens instead of words +# the tokens are grouped into words and max pooling is used to get the word attribution +function group_into_words(text, attributions, cls_tkr) + toks = decode(cls_tkr, encode(cls_tkr, text).token) + word_attributions = [] + for (i, (dec_tok, attrib)) in enumerate(zip(toks, attributions)) + if startswith(dec_tok, "<") + continue + elseif length(word_attributions) == 0 || startswith(dec_tok, " ") + push!(word_attributions, ([i], [attrib[1]], [attrib[2]])) + else + last_processed = last(word_attributions) + push!(last_processed[1], i) + push!(last_processed[2], attrib[1]) + push!(last_processed[3], attrib[2]) + end + end + return word_attributions +end + +# Returns a list of indexes of words with the highest attribution scores +function get_top_k_idx(attributions, k=10) + sorted = sort(attributions, by = x -> -maximum(x[3])) + idx_to_mask = [] + for row in first(sorted, k) + append!(idx_to_mask, row[1]) + end + return idx_to_mask +end + +# Masks tokens (here words) at corresponding indexes and forms them into a string +function mask_toks_at_idx(toks, idx_to_mask) + masked_text = Vector{Char}() + for (i, token) in enumerate(toks) + if startswith(token, "<") + continue + elseif i in idx_to_mask + append!(masked_text, " [MASK]") + else + append!(masked_text, token) + end + end + + return String(masked_text) +end + +# Since the CMLM model and the classifier models' tokenizers differ +# we have to create a different masking for the CMLM tokenizer +function get_idx_cmlm(cmlm_decoded) + idx_to_mask = [] + for (i, tok) in enumerate(cmlm_decoded) + if tok == "[MASK]" + push!(idx_to_mask, i) + end + end + return idx_to_mask +end + +# Merges a token list into a string, masking at specified indexes +function merge_tokens(tokens, idx_to_mask=[]) + merged_text = Vector{Char}() + for (i, token) in enumerate(tokens) + if i in idx_to_mask + append!(merged_text, " [MASK]") + else + append!(merged_text, " " * token) + end + end + + return chop(String(merged_text), head=1, tail=0) +end + +# Merges the CMLM output token list into a string +function group_into_words(cmlm_out, delim="##") + word_list = [] + for token in cmlm_out + if startswith(delim, token) && length(word_list) != 0 + last(word_list) = last(word_list) * chop(token, head=2, tail=0) + else + push(word_list, token) + end + end + return word_list +end + +# Recursively fills in the tokens +# The function selects the lowest index from mask_position and uses the CMLM +# to fill in the predicted token at the given position +# Once the mask_position list is empty, the merged string is returned +function left_to_right_filling(tokens, mask_positions, model, tokenizer) + if length(mask_positions) == 0 + return merge_tokens(tokens) + end + + masked_text = merge_tokens(tokens, mask_positions) + # println(masked_text) + + out = decode(cmlm_tkr, cmlm_model(encode(cmlm_tkr, masked_text)).logit) + + mask_positions = sort(mask_positions) + next_position = popfirst!(mask_positions) + + next_token = out[next_position+1] + + tokens[next_position] = next_token + + return left_to_right_filling(tokens, mask_positions, model, tokenizer) +end + +# Recursively fills in the tokens using CMLM uncertainty +# The function selects the masked token with the lowest logit entropy +# and fills in the predicted token at the given position +# Once the mask_position list is empty, the merged string is returned +function uncertainty_filling(tokens, mask_positions, model, tokenizer) + if length(mask_positions) == 0 + return merge_tokens(tokens) + end + + masked_text = merge_tokens(tokens, mask_positions) + # println(masked_text) + + logits = cmlm_model(encode(cmlm_tkr, masked_text)).logit + out = decode(cmlm_tkr, logits) + + probs = softmax(logits[:, mask_positions, :], dims=1) + + entrs = [] + for i in 1:length(mask_positions) + push!(entrs, entropy(probs[:, i])) + end + + next_position = mask_positions[argmin(entrs)] + filter!(x -> x != next_position, mask_positions) + + next_token = out[next_position+1] + + tokens[next_position] = next_token + return uncertainty_filling(tokens, mask_positions, model, tokenizer) +end + +function uncertainty_filling(tokens, mask_positions, model, tokenizer) + if length(mask_positions) == 0 + return merge_tokens(tokens) + end + + masked_text = merge_tokens(tokens, mask_positions) + # println(masked_text) + + logits = cmlm_model(encode(cmlm_tkr, masked_text)).logit + out = decode(cmlm_tkr, logits) + + probs = softmax(logits[:, mask_positions, :], dims=1) + + entrs = [] + for i in 1:length(mask_positions) + push!(entrs, entropy(probs[:, i])) + end + + next_position = mask_positions[argmin(entrs)] + filter!(x -> x != next_position, mask_positions) + + next_token = out[next_position+1] + + tokens[next_position] = next_token + return uncertainty_filling(tokens, mask_positions, model, tokenizer) +end \ No newline at end of file diff --git a/src/models/Models.jl b/src/models/Models.jl index 8b3232a01..6dfb786cb 100755 --- a/src/models/Models.jl +++ b/src/models/Models.jl @@ -2,16 +2,17 @@ module Models using ..CounterfactualExplanations using ..DataPreprocessing -using Parameters +using DataFrames using Flux -using MLJBase using LazyArtifacts -using Serialization +using MLJBase +using MLJDecisionTreeInterface using MLUtils +using Parameters using ProgressMeter +using Serialization using Statistics -using DataFrames -using MLJDecisionTreeInterface +using Transformers include("utils.jl") diff --git a/src/models/pretrained/fomc.jl b/src/models/pretrained/fomc.jl new file mode 100644 index 000000000..4534223f7 --- /dev/null +++ b/src/models/pretrained/fomc.jl @@ -0,0 +1,21 @@ +# Loads the Trillion Dollar Word classifier +function load_fomc_classifier(; kwrgs...) + model_name = "gtfintechlab/FOMC-RoBERTa" + + tkr = Transformers.load_tokenizer(model_name) + cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...) + mod = Transformers.load_model(model_name, "ForSequenceClassification"; config = cfg) + + return tkr, mod +end + +# Loads the CMLM model +function load_fomc_cmlm(; kwrgs...) + model_name = "karoldobiczek/relitc-FOMC-CMLM" + + tkr = Transformers.load_tokenizer(model_name) + cfg = Transformers.HuggingFace.HGFConfig(Transformers.load_config(model_name); kwrgs...) + mod = Transformers.load_model(model_name, "ForMaskedLM"; config = cfg) + + return tkr, mod +end diff --git a/src/models/pretrained/pretrained.jl b/src/models/pretrained/pretrained.jl index 42f5b07b1..aa8052383 100644 --- a/src/models/pretrained/pretrained.jl +++ b/src/models/pretrained/pretrained.jl @@ -1,5 +1,6 @@ vision_dir = CounterfactualExplanations.generate_artifact_dir("model-vision") +include("cifar_10.jl") include("fashion_mnist.jl") +include("fomc.jl") include("mnist.jl") -include("cifar_10.jl")