diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1e73c80 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +Level 1/Intermediate/.jovianrc \ No newline at end of file diff --git a/Level 1/Intermediate/.ipynb_checkpoints/US Police Killings Analysis-checkpoint.ipynb b/Level 1/Intermediate/.ipynb_checkpoints/US Police Killings Analysis-checkpoint.ipynb new file mode 100644 index 0000000..a67c033 --- /dev/null +++ b/Level 1/Intermediate/.ipynb_checkpoints/US Police Killings Analysis-checkpoint.ipynb @@ -0,0 +1,5360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Import required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import re\n", + "import seaborn as sns\n", + "sns.set_style(\"whitegrid\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Open and read the data sets." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "income = pd.read_csv('datasets/MedianHouseholdIncome2015.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "poverty = pd.read_csv('datasets/PercentagePeopleBelowPovertyLevel.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "education = pd.read_csv('datasets/PercentOver25CompletedHighSchool.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "killings = pd.read_csv('datasets/PoliceKillingsUS.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "city_race = pd.read_csv('datasets/ShareRaceByCity.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCityMedian Income
0ALAbanda CDP11207
1ALAbbeville city25615
2ALAdamsville city42575
3ALAddison town37083
4ALAkron town21667
\n", + "
" + ], + "text/plain": [ + " Geographic Area City Median Income\n", + "0 AL Abanda CDP 11207\n", + "1 AL Abbeville city 25615\n", + "2 AL Adamsville city 42575\n", + "3 AL Addison town 37083\n", + "4 AL Akron town 21667" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "income.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "51" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "income['Geographic Area'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "city_samp = city_race.sample(frac=0.25, replace=False, random_state=0, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 7317 entries, 20414 to 22006\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic area 7317 non-null object\n", + " 1 City 7317 non-null object\n", + " 2 share_white 7317 non-null object\n", + " 3 share_black 7317 non-null object\n", + " 4 share_native_american 7317 non-null object\n", + " 5 share_asian 7317 non-null object\n", + " 6 share_hispanic 7317 non-null object\n", + "dtypes: object(7)\n", + "memory usage: 457.3+ KB\n" + ] + } + ], + "source": [ + "city_samp.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city_samp.index.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29322 entries, 0 to 29321\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic Area 29322 non-null object\n", + " 1 City 29322 non-null object\n", + " 2 Median Income 29271 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 687.4+ KB\n" + ] + } + ], + "source": [ + "income.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCitypoverty_rate
0ALAbanda CDP78.8
1ALAbbeville city29.1
2ALAdamsville city25.5
3ALAddison town30.7
4ALAkron town42
\n", + "
" + ], + "text/plain": [ + " Geographic Area City poverty_rate\n", + "0 AL Abanda CDP 78.8\n", + "1 AL Abbeville city 29.1\n", + "2 AL Adamsville city 25.5\n", + "3 AL Addison town 30.7\n", + "4 AL Akron town 42" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poverty.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic Area 29329 non-null object\n", + " 1 City 29329 non-null object\n", + " 2 poverty_rate 29329 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 687.5+ KB\n" + ] + } + ], + "source": [ + "poverty.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCitypercent_completed_hs
0ALAbanda CDP21.2
1ALAbbeville city69.1
2ALAdamsville city78.9
3ALAddison town81.4
4ALAkron town68.6
\n", + "
" + ], + "text/plain": [ + " Geographic Area City percent_completed_hs\n", + "0 AL Abanda CDP 21.2\n", + "1 AL Abbeville city 69.1\n", + "2 AL Adamsville city 78.9\n", + "3 AL Addison town 81.4\n", + "4 AL Akron town 68.6" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "education.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic Area 29329 non-null object\n", + " 1 City 29329 non-null object\n", + " 2 percent_completed_hs 29329 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 687.5+ KB\n" + ] + } + ], + "source": [ + "education.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot02/01/15shotgun53.0MASheltonWATrueattackNot fleeingFalse
14Lewis Lee Lembke02/01/15shotgun47.0MWAlohaORFalseattackNot fleeingFalse
25John Paul Quintero03/01/15shot and Taseredunarmed23.0MHWichitaKSFalseotherNot fleeingFalse
38Matthew Hoffman04/01/15shottoy weapon32.0MWSan FranciscoCATrueattackNot fleeingFalse
49Michael Rodriguez04/01/15shotnail gun39.0MHEvansCOFalseattackNot fleeingFalse
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 02/01/15 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 02/01/15 shot gun 47.0 \n", + "2 5 John Paul Quintero 03/01/15 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 04/01/15 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 04/01/15 shot nail gun 39.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True attack \n", + "1 M W Aloha OR False attack \n", + "2 M H Wichita KS False other \n", + "3 M W San Francisco CA True attack \n", + "4 M H Evans CO False attack \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False " + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 2535 entries, 0 to 2534\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 2535 non-null int64 \n", + " 1 name 2535 non-null object \n", + " 2 date 2535 non-null object \n", + " 3 manner_of_death 2535 non-null object \n", + " 4 armed 2526 non-null object \n", + " 5 age 2458 non-null float64\n", + " 6 gender 2535 non-null object \n", + " 7 race 2340 non-null object \n", + " 8 city 2535 non-null object \n", + " 9 state 2535 non-null object \n", + " 10 signs_of_mental_illness 2535 non-null bool \n", + " 11 threat_level 2535 non-null object \n", + " 12 flee 2470 non-null object \n", + " 13 body_camera 2535 non-null bool \n", + "dtypes: bool(2), float64(1), int64(1), object(10)\n", + "memory usage: 242.7+ KB\n" + ] + } + ], + "source": [ + "killings.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic areaCityshare_whiteshare_blackshare_native_americanshare_asianshare_hispanic
0ALAbanda CDP67.230.2001.6
1ALAbbeville city54.441.40.113.1
2ALAdamsville city52.344.90.50.32.3
3ALAddison town99.10.100.10.4
4ALAkron town13.286.5000.3
\n", + "
" + ], + "text/plain": [ + " Geographic area City share_white share_black \\\n", + "0 AL Abanda CDP 67.2 30.2 \n", + "1 AL Abbeville city 54.4 41.4 \n", + "2 AL Adamsville city 52.3 44.9 \n", + "3 AL Addison town 99.1 0.1 \n", + "4 AL Akron town 13.2 86.5 \n", + "\n", + " share_native_american share_asian share_hispanic \n", + "0 0 0 1.6 \n", + "1 0.1 1 3.1 \n", + "2 0.5 0.3 2.3 \n", + "3 0 0.1 0.4 \n", + "4 0 0 0.3 " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city_race.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29268 entries, 0 to 29267\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic area 29268 non-null object\n", + " 1 City 29268 non-null object\n", + " 2 share_white 29268 non-null object\n", + " 3 share_black 29268 non-null object\n", + " 4 share_native_american 29268 non-null object\n", + " 5 share_asian 29268 non-null object\n", + " 6 share_hispanic 29268 non-null object\n", + "dtypes: object(7)\n", + "memory usage: 1.6+ MB\n" + ] + } + ], + "source": [ + "city_race.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will concatenate the `income`, `poverty`, `education` and `city_race` dataframes for compact analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.concat([poverty, education, income, city_race], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCitypoverty_rateGeographic AreaCitypercent_completed_hsGeographic AreaCityMedian IncomeGeographic areaCityshare_whiteshare_blackshare_native_americanshare_asianshare_hispanic
26119TXTimpson city42.7TXTimpson city70.8TXTomball city44086TXVenus town79.413.20.51.724.8
20016OHPainesville city23OHPainesville city78.1OHParma city50440OHPleasant Run CDP79.814.50.51.82.2
15984NJEast Rutherford borough10.1NJEast Rutherford borough92.9NJEllisburg CDP61544NJFarmingdale borough89.62.90.53.26.9
14734MTCamas CDP48.6MTCamas CDP87.5MTCharlo CDP44583MTConrad city95.10.21.80.31.5
18451NCMarshville town28.9NCMarshville town71.1NCMaysville town24432NCMorganton city70.112.20.92.416.4
28940WIRichfield village2.8WIRichfield village95.8WIRiver Hills village156250WISpooner city95.10.31.90.71.3
1442ARBlue Eye town74.4ARBlue Eye town16.7ARBlue Eye town(X)ARBooneville city93.510.90.63.2
25763TXPoint Comfort city7.3TXPoint Comfort city89.2TXPortland city62561TXRamos CDP76.7000100
7761INLittle York town20.1INLittle York town77.8INLogansport city32982INLowell town95.90.50.40.36.9
1592ARGreers Ferry city13ARGreers Ferry city81.1ARGreers Ferry city31810ARHackett city920.13.40.60.6
\n", + "
" + ], + "text/plain": [ + " Geographic Area City poverty_rate Geographic Area \\\n", + "26119 TX Timpson city 42.7 TX \n", + "20016 OH Painesville city 23 OH \n", + "15984 NJ East Rutherford borough 10.1 NJ \n", + "14734 MT Camas CDP 48.6 MT \n", + "18451 NC Marshville town 28.9 NC \n", + "28940 WI Richfield village 2.8 WI \n", + "1442 AR Blue Eye town 74.4 AR \n", + "25763 TX Point Comfort city 7.3 TX \n", + "7761 IN Little York town 20.1 IN \n", + "1592 AR Greers Ferry city 13 AR \n", + "\n", + " City percent_completed_hs Geographic Area \\\n", + "26119 Timpson city 70.8 TX \n", + "20016 Painesville city 78.1 OH \n", + "15984 East Rutherford borough 92.9 NJ \n", + "14734 Camas CDP 87.5 MT \n", + "18451 Marshville town 71.1 NC \n", + "28940 Richfield village 95.8 WI \n", + "1442 Blue Eye town 16.7 AR \n", + "25763 Point Comfort city 89.2 TX \n", + "7761 Little York town 77.8 IN \n", + "1592 Greers Ferry city 81.1 AR \n", + "\n", + " City Median Income Geographic area City \\\n", + "26119 Tomball city 44086 TX Venus town \n", + "20016 Parma city 50440 OH Pleasant Run CDP \n", + "15984 Ellisburg CDP 61544 NJ Farmingdale borough \n", + "14734 Charlo CDP 44583 MT Conrad city \n", + "18451 Maysville town 24432 NC Morganton city \n", + "28940 River Hills village 156250 WI Spooner city \n", + "1442 Blue Eye town (X) AR Booneville city \n", + "25763 Portland city 62561 TX Ramos CDP \n", + "7761 Logansport city 32982 IN Lowell town \n", + "1592 Greers Ferry city 31810 AR Hackett city \n", + "\n", + " share_white share_black share_native_american share_asian share_hispanic \n", + "26119 79.4 13.2 0.5 1.7 24.8 \n", + "20016 79.8 14.5 0.5 1.8 2.2 \n", + "15984 89.6 2.9 0.5 3.2 6.9 \n", + "14734 95.1 0.2 1.8 0.3 1.5 \n", + "18451 70.1 12.2 0.9 2.4 16.4 \n", + "28940 95.1 0.3 1.9 0.7 1.3 \n", + "1442 93.5 1 0.9 0.6 3.2 \n", + "25763 76.7 0 0 0 100 \n", + "7761 95.9 0.5 0.4 0.3 6.9 \n", + "1592 92 0.1 3.4 0.6 0.6 " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "#rename the columns\n", + "data.columns = ['state', 'city', 'poverty_rate', 'Geographic_Area_x', 'City_x',\n", + " 'education', 'Geographic_Area_y', 'City_y', 'income',\n", + " 'Geographic_area_z', 'City_z', 'share_white', 'share_black',\n", + " 'share_native_american', 'share_asian', 'share_hispanic']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- The cities are not the same, but the areas(state) are. In this case I will perform my analysis based on geographic area." + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "data.drop(['Geographic_Area_x', 'City_x', 'Geographic_Area_y', 'City_y', 'Geographic_area_z', 'City_z'], \n", + " axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statecitypoverty_rateeducationincomeshare_whiteshare_blackshare_native_americanshare_asianshare_hispanic
18204NCDellview town01002361679.317.10.302.1
10302KYWhite Plains city14.482.35733494.11.6000
20058OHPlumwood CDP6.280.35342148.544.90.14.31.8
20849OKMulhall town28.689.4-88.2010.600
23114PAUtica borough20.584.34546998.60.30.10.41
21753PACentre Hall borough694.95375098.20.300.40.9
11271MDNanticoke Acres CDP52.6505766565.3230.45.55.9
12766MNHoffman city21.887.74500094.71.10.50.72.8
21915PAEagles Mere borough2.51005682795.71.20.21.50.8
4443FLHastings town26.381.836196970.70.40.44.3
2124CACamarillo city6.492.28815296.90006.2
14244MOMaplewood city19.691.52531396.60001.7
6547ILGrand Ridge village10.894.83906383.73.30.36.88.8
14122MOIronton city36.380.62375096.90000.6
4150FLArcher city35.590.6211465838100
5427GAMineral Bluff CDP251003309765.632.1003.7
5649GAWarwick city38.564.14300039.157.70.212.5
5573GASocial Circle city12.479.62432154.440.10.223
10038KYIndependence city8.489.82187598.40.300.30.3
28846WINiagara city20.393.28303698.300.401
\n", + "
" + ], + "text/plain": [ + " state city poverty_rate education income share_white \\\n", + "18204 NC Dellview town 0 100 23616 79.3 \n", + "10302 KY White Plains city 14.4 82.3 57334 94.1 \n", + "20058 OH Plumwood CDP 6.2 80.3 53421 48.5 \n", + "20849 OK Mulhall town 28.6 89.4 - 88.2 \n", + "23114 PA Utica borough 20.5 84.3 45469 98.6 \n", + "21753 PA Centre Hall borough 6 94.9 53750 98.2 \n", + "11271 MD Nanticoke Acres CDP 52.6 50 57665 65.3 \n", + "12766 MN Hoffman city 21.8 87.7 45000 94.7 \n", + "21915 PA Eagles Mere borough 2.5 100 56827 95.7 \n", + "4443 FL Hastings town 26.3 81.8 36196 97 \n", + "2124 CA Camarillo city 6.4 92.2 88152 96.9 \n", + "14244 MO Maplewood city 19.6 91.5 25313 96.6 \n", + "6547 IL Grand Ridge village 10.8 94.8 39063 83.7 \n", + "14122 MO Ironton city 36.3 80.6 23750 96.9 \n", + "4150 FL Archer city 35.5 90.6 21146 58 \n", + "5427 GA Mineral Bluff CDP 25 100 33097 65.6 \n", + "5649 GA Warwick city 38.5 64.1 43000 39.1 \n", + "5573 GA Social Circle city 12.4 79.6 24321 54.4 \n", + "10038 KY Independence city 8.4 89.8 21875 98.4 \n", + "28846 WI Niagara city 20.3 93.2 83036 98.3 \n", + "\n", + " share_black share_native_american share_asian share_hispanic \n", + "18204 17.1 0.3 0 2.1 \n", + "10302 1.6 0 0 0 \n", + "20058 44.9 0.1 4.3 1.8 \n", + "20849 0 10.6 0 0 \n", + "23114 0.3 0.1 0.4 1 \n", + "21753 0.3 0 0.4 0.9 \n", + "11271 23 0.4 5.5 5.9 \n", + "12766 1.1 0.5 0.7 2.8 \n", + "21915 1.2 0.2 1.5 0.8 \n", + "4443 0.7 0.4 0.4 4.3 \n", + "2124 0 0 0 6.2 \n", + "14244 0 0 0 1.7 \n", + "6547 3.3 0.3 6.8 8.8 \n", + "14122 0 0 0 0.6 \n", + "4150 38 1 0 0 \n", + "5427 32.1 0 0 3.7 \n", + "5649 57.7 0.2 1 2.5 \n", + "5573 40.1 0.2 2 3 \n", + "10038 0.3 0 0.3 0.3 \n", + "28846 0 0.4 0 1 " + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 29329 non-null object\n", + " 1 city 29329 non-null object\n", + " 2 poverty_rate 29329 non-null object\n", + " 3 education 29329 non-null object\n", + " 4 income 29271 non-null object\n", + " 5 share_white 29268 non-null object\n", + " 6 share_black 29268 non-null object\n", + " 7 share_native_american 29268 non-null object\n", + " 8 share_asian 29268 non-null object\n", + " 9 share_hispanic 29268 non-null object\n", + "dtypes: object(10)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot02/01/15shotgun53.0MASheltonWATrueattackNot fleeingFalse
14Lewis Lee Lembke02/01/15shotgun47.0MWAlohaORFalseattackNot fleeingFalse
25John Paul Quintero03/01/15shot and Taseredunarmed23.0MHWichitaKSFalseotherNot fleeingFalse
38Matthew Hoffman04/01/15shottoy weapon32.0MWSan FranciscoCATrueattackNot fleeingFalse
49Michael Rodriguez04/01/15shotnail gun39.0MHEvansCOFalseattackNot fleeingFalse
.............................................
25302822Rodney E. Jacobs28/07/17shotgun31.0MNaNKansas CityMOFalseattackNot fleeingFalse
25312813TK TK28/07/17shotvehicleNaNMNaNAlbuquerqueNMFalseattackCarFalse
25322818Dennis W. Robinson29/07/17shotgun48.0MNaNMelbaIDFalseattackCarFalse
25332817Isaiah Tucker31/07/17shotvehicle28.0MBOshkoshWIFalseattackCarTrue
25342815Dwayne Jeune31/07/17shotknife32.0MBBrooklynNYTrueattackNot fleeingFalse
\n", + "

2535 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 02/01/15 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 02/01/15 shot gun 47.0 \n", + "2 5 John Paul Quintero 03/01/15 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 04/01/15 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 04/01/15 shot nail gun 39.0 \n", + "... ... ... ... ... ... ... \n", + "2530 2822 Rodney E. Jacobs 28/07/17 shot gun 31.0 \n", + "2531 2813 TK TK 28/07/17 shot vehicle NaN \n", + "2532 2818 Dennis W. Robinson 29/07/17 shot gun 48.0 \n", + "2533 2817 Isaiah Tucker 31/07/17 shot vehicle 28.0 \n", + "2534 2815 Dwayne Jeune 31/07/17 shot knife 32.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True attack \n", + "1 M W Aloha OR False attack \n", + "2 M H Wichita KS False other \n", + "3 M W San Francisco CA True attack \n", + "4 M H Evans CO False attack \n", + "... ... ... ... ... ... ... \n", + "2530 M NaN Kansas City MO False attack \n", + "2531 M NaN Albuquerque NM False attack \n", + "2532 M NaN Melba ID False attack \n", + "2533 M B Oshkosh WI False attack \n", + "2534 M B Brooklyn NY True attack \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False \n", + "... ... ... \n", + "2530 Not fleeing False \n", + "2531 Car False \n", + "2532 Car False \n", + "2533 Car True \n", + "2534 Not fleeing False \n", + "\n", + "[2535 rows x 14 columns]" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Data Cleaning and Prerocessing\n", + " - filling in missing values and changing data types" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state 0\n", + "city 0\n", + "poverty_rate 0\n", + "education 0\n", + "income 58\n", + "share_white 61\n", + "share_black 61\n", + "share_native_american 61\n", + "share_asian 61\n", + "share_hispanic 61\n", + "dtype: int64" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#check for missing values\n", + "data.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- These are blocks of code that clean the object columns and convert them to float data types." + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['state', 'city', 'poverty_rate', 'education', 'income', 'share_white',\n", + " 'share_black', 'share_native_american', 'share_asian',\n", + " 'share_hispanic'],\n", + " dtype='object')" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "#income column\n", + "#remove characters that are not digits\n", + "data['income_col'] = data['income'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "#remove existing spaces and join the digits\n", + "data['income_col'] = data['income_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "#fill in entire spaces with zero as a string\n", + "data.loc[data['income_col'] == \"\", 'income_col'] = '0'\n", + "\n", + "#drop the original column\n", + "data.drop('income', axis=1, inplace=True)\n", + "\n", + "#change the type from object to float and rename the column\n", + "data['income'] = data['income_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('income_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [], + "source": [ + "#poverty column\n", + "data['poverty_col'] = data['poverty_rate'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['poverty_col'] = data['poverty_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['poverty_col'] == \"\", 'poverty_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['poverty_rate'] = data['poverty_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('poverty_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "#education column\n", + "data['education_col'] = data['education'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['education_col'] = data['education_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['education_col'] == \"\", 'education_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['education'] = data['education_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('education_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "#share_white column\n", + "data['share_white_col'] = data['share_white'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_white_col'] = data['share_white_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_white_col'] == \"\", 'share_white_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_white'] = data['share_white_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_white_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "#share_black column\n", + "data['share_black_col'] = data['share_black'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_black_col'] = data['share_black_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_black_col'] == \"\", 'share_black_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_black'] = data['share_black_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_black_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "#share_native_american column\n", + "data['share_native_american_col'] = data['share_native_american'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_native_american_col'] = data['share_native_american_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_native_american_col'] == \"\", 'share_native_american_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_native_american'] = data['share_native_american_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_native_american_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "#share_asian column\n", + "data['share_asian_col'] = data['share_asian'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_asian_col'] = data['share_asian_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_asian_col'] == \"\", 'share_asian_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_asian'] = data['share_asian_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_asian_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "#share_hispanic column\n", + "data['share_hispanic_col'] = data['share_hispanic'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_hispanic_col'] = data['share_hispanic_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_hispanic_col'] == \"\", 'share_hispanic_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_hispanic'] = data['share_hispanic_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_hispanic_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "id 0\n", + "name 0\n", + "date 0\n", + "manner_of_death 0\n", + "armed 9\n", + "age 77\n", + "gender 0\n", + "race 195\n", + "city 0\n", + "state 0\n", + "signs_of_mental_illness 0\n", + "threat_level 0\n", + "flee 65\n", + "body_camera 0\n", + "dtype: int64" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will fill in the missing values in various columns and also categorize some columns in the killings dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "age_median = killings['age'].median()\n", + "killings['age'].fillna(age_median, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "top_race = killings['race'].describe().top\n", + "killings['race'].fillna(top_race, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "top_flee = killings['flee'].describe().top\n", + "killings['flee'].fillna(top_flee, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [], + "source": [ + "top_armed = killings['armed'].describe().top\n", + "killings['armed'].fillna(top_armed, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "killings.loc[killings['threat_level'] == 'attack', 'threat_level'] = 'high'\n", + "killings.loc[killings['threat_level'] == 'other', 'threat_level'] = 'medium'\n", + "killings.loc[killings['threat_level'] == 'undetermined', 'threat_level'] = 'low'" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [], + "source": [ + "killings['threat_level'] = killings['threat_level'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [], + "source": [ + "killings['manner_of_death'] = killings['manner_of_death'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [], + "source": [ + "killings['gender'] = killings['gender'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [], + "source": [ + "killings['date'] = pd.to_datetime(killings['date'], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exploratory Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poverty_rateeducationshare_whiteshare_blackshare_native_americanshare_asianshare_hispanicincome
count29329.00000029329.00000029329.00000029329.00000029329.00000029329.00000029329.00000029329.000000
mean146.864400739.397456724.94394662.46315925.73289214.07334082.03614247991.033619
std127.664304291.350599326.169477150.394184118.51309240.698589161.61449527783.222116
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%48.000000736.000000605.0000001.0000001.0000000.0000008.00000033333.000000
50%121.000000858.000000894.0000007.0000003.0000004.00000025.00000043750.000000
75%212.000000921.000000962.00000035.0000008.00000011.00000071.00000057969.000000
max986.000000999.000000999.000000995.000000997.000000671.000000999.000000250000.000000
\n", + "
" + ], + "text/plain": [ + " poverty_rate education share_white share_black \\\n", + "count 29329.000000 29329.000000 29329.000000 29329.000000 \n", + "mean 146.864400 739.397456 724.943946 62.463159 \n", + "std 127.664304 291.350599 326.169477 150.394184 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 48.000000 736.000000 605.000000 1.000000 \n", + "50% 121.000000 858.000000 894.000000 7.000000 \n", + "75% 212.000000 921.000000 962.000000 35.000000 \n", + "max 986.000000 999.000000 999.000000 995.000000 \n", + "\n", + " share_native_american share_asian share_hispanic income \n", + "count 29329.000000 29329.000000 29329.000000 29329.000000 \n", + "mean 25.732892 14.073340 82.036142 47991.033619 \n", + "std 118.513092 40.698589 161.614495 27783.222116 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 0.000000 8.000000 33333.000000 \n", + "50% 3.000000 4.000000 25.000000 43750.000000 \n", + "75% 8.000000 11.000000 71.000000 57969.000000 \n", + "max 997.000000 671.000000 999.000000 250000.000000 " + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
count2535.00000025352535253525352535.00000025352535253525352535253525352535
uniqueNaN2481879268NaN261417512342
topNaNTK TK2017-01-24 00:00:00shotgunNaNMWLos AngelesCAFalsehighNot fleeingFalse
freqNaN49823631407NaN24281396394241902161117602264
firstNaNNaN2015-01-03 00:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
lastNaNNaN2017-12-07 00:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean1445.731755NaNNaNNaNNaN36.526233NaNNaNNaNNaNNaNNaNNaNNaN
std794.259490NaNNaNNaNNaN12.839056NaNNaNNaNNaNNaNNaNNaNNaN
min3.000000NaNNaNNaNNaN6.000000NaNNaNNaNNaNNaNNaNNaNNaN
25%768.500000NaNNaNNaNNaN27.000000NaNNaNNaNNaNNaNNaNNaNNaN
50%1453.000000NaNNaNNaNNaN34.000000NaNNaNNaNNaNNaNNaNNaNNaN
75%2126.500000NaNNaNNaNNaN45.000000NaNNaNNaNNaNNaNNaNNaNNaN
max2822.000000NaNNaNNaNNaN91.000000NaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed \\\n", + "count 2535.000000 2535 2535 2535 2535 \n", + "unique NaN 2481 879 2 68 \n", + "top NaN TK TK 2017-01-24 00:00:00 shot gun \n", + "freq NaN 49 8 2363 1407 \n", + "first NaN NaN 2015-01-03 00:00:00 NaN NaN \n", + "last NaN NaN 2017-12-07 00:00:00 NaN NaN \n", + "mean 1445.731755 NaN NaN NaN NaN \n", + "std 794.259490 NaN NaN NaN NaN \n", + "min 3.000000 NaN NaN NaN NaN \n", + "25% 768.500000 NaN NaN NaN NaN \n", + "50% 1453.000000 NaN NaN NaN NaN \n", + "75% 2126.500000 NaN NaN NaN NaN \n", + "max 2822.000000 NaN NaN NaN NaN \n", + "\n", + " age gender race city state signs_of_mental_illness \\\n", + "count 2535.000000 2535 2535 2535 2535 2535 \n", + "unique NaN 2 6 1417 51 2 \n", + "top NaN M W Los Angeles CA False \n", + "freq NaN 2428 1396 39 424 1902 \n", + "first NaN NaN NaN NaN NaN NaN \n", + "last NaN NaN NaN NaN NaN NaN \n", + "mean 36.526233 NaN NaN NaN NaN NaN \n", + "std 12.839056 NaN NaN NaN NaN NaN \n", + "min 6.000000 NaN NaN NaN NaN NaN \n", + "25% 27.000000 NaN NaN NaN NaN NaN \n", + "50% 34.000000 NaN NaN NaN NaN NaN \n", + "75% 45.000000 NaN NaN NaN NaN NaN \n", + "max 91.000000 NaN NaN NaN NaN NaN \n", + "\n", + " threat_level flee body_camera \n", + "count 2535 2535 2535 \n", + "unique 3 4 2 \n", + "top high Not fleeing False \n", + "freq 1611 1760 2264 \n", + "first NaN NaN NaN \n", + "last NaN NaN NaN \n", + "mean NaN NaN NaN \n", + "std NaN NaN NaN \n", + "min NaN NaN NaN \n", + "25% NaN NaN NaN \n", + "50% NaN NaN NaN \n", + "75% NaN NaN NaN \n", + "max NaN NaN NaN " + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 29329 non-null object \n", + " 1 city 29329 non-null object \n", + " 2 poverty_rate 29329 non-null float64\n", + " 3 education 29329 non-null float64\n", + " 4 share_white 29329 non-null float64\n", + " 5 share_black 29329 non-null float64\n", + " 6 share_native_american 29329 non-null float64\n", + " 7 share_asian 29329 non-null float64\n", + " 8 share_hispanic 29329 non-null float64\n", + " 9 income 29329 non-null float64\n", + "dtypes: float64(8), object(2)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statecitypoverty_rateeducationshare_whiteshare_blackshare_native_americanshare_asianshare_hispanicincome
0ALAbanda CDP788.0212.0672.0302.00.00.016.011207.0
1ALAbbeville city291.0691.0544.0414.01.01.031.025615.0
2ALAdamsville city255.0789.0523.0449.05.03.023.042575.0
3ALAddison town307.0814.0991.01.00.01.04.037083.0
4ALAkron town42.0686.0132.0865.00.00.03.021667.0
\n", + "
" + ], + "text/plain": [ + " state city poverty_rate education share_white share_black \\\n", + "0 AL Abanda CDP 788.0 212.0 672.0 302.0 \n", + "1 AL Abbeville city 291.0 691.0 544.0 414.0 \n", + "2 AL Adamsville city 255.0 789.0 523.0 449.0 \n", + "3 AL Addison town 307.0 814.0 991.0 1.0 \n", + "4 AL Akron town 42.0 686.0 132.0 865.0 \n", + "\n", + " share_native_american share_asian share_hispanic income \n", + "0 0.0 0.0 16.0 11207.0 \n", + "1 1.0 1.0 31.0 25615.0 \n", + "2 5.0 3.0 23.0 42575.0 \n", + "3 0.0 1.0 4.0 37083.0 \n", + "4 0.0 0.0 3.0 21667.0 " + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "total_state_income = pd.pivot_table(data, values='income', index='state', aggfunc='sum')" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
income
state
AK14900484.0
AL22155211.0
AR18366199.0
AZ15810888.0
CA84771828.0
\n", + "
" + ], + "text/plain": [ + " income\n", + "state \n", + "AK 14900484.0\n", + "AL 22155211.0\n", + "AR 18366199.0\n", + "AZ 15810888.0\n", + "CA 84771828.0" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_state_income.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.catplot(x=total_state_income.index, y='income', data=total_state_income, kind='bar', height=5, aspect=14/5)\n", + "plt.xticks(rotation=90)\n", + "\n", + "# Show plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [], + "source": [ + "avg_state_poverty = pd.pivot_table(data, values='poverty_rate', index='state', aggfunc='mean') " + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poverty_rate
state
AK164.602817
AL187.502564
AR205.609982
AZ221.889135
CA148.695795
\n", + "
" + ], + "text/plain": [ + " poverty_rate\n", + "state \n", + "AK 164.602817\n", + "AL 187.502564\n", + "AR 205.609982\n", + "AZ 221.889135\n", + "CA 148.695795" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_state_poverty.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.catplot(x=avg_state_poverty.index, y='poverty_rate', data=avg_state_poverty, kind='bar', height=5, aspect=14/5)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 29329 non-null object \n", + " 1 city 29329 non-null object \n", + " 2 poverty_rate 29329 non-null float64\n", + " 3 education 29329 non-null float64\n", + " 4 share_white 29329 non-null float64\n", + " 5 share_black 29329 non-null float64\n", + " 6 share_native_american 29329 non-null float64\n", + " 7 share_asian 29329 non-null float64\n", + " 8 share_hispanic 29329 non-null float64\n", + " 9 income 29329 non-null float64\n", + "dtypes: float64(8), object(2)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Areas of exploration:\n", + "1. Find the correlation between poverty, education and income in the states\n", + " - which states are poor|rich, more educated, higher income\n", + "2. Top 5 states:\n", + " - with the most killings\n", + " - share of race\n", + " - level of education\n", + " - poverty|income levels\n", + " - e.g. is the race of most killings related to the share of race?\n", + "\n", + "3. Describe the average profile of a person being killed by police:\n", + " - age\n", + " - gender\n", + " - race\n", + " - state\n", + " - poverty|income levels\n", + " - education\n", + " - share of race\n", + "4. Are these killings justified?\n", + " - what is the correlation between manner of death and threat_level|flee\n", + " - did the the threat_level justify the manner of death?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Exploring the correlation between income, education and poverty rates in the various states." + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statecitypoverty_rateeducationshare_whiteshare_blackshare_native_americanshare_asianshare_hispanicincome
0ALAbanda CDP788.0212.0672.0302.00.00.016.011207.0
1ALAbbeville city291.0691.0544.0414.01.01.031.025615.0
2ALAdamsville city255.0789.0523.0449.05.03.023.042575.0
3ALAddison town307.0814.0991.01.00.01.04.037083.0
4ALAkron town42.0686.0132.0865.00.00.03.021667.0
\n", + "
" + ], + "text/plain": [ + " state city poverty_rate education share_white share_black \\\n", + "0 AL Abanda CDP 788.0 212.0 672.0 302.0 \n", + "1 AL Abbeville city 291.0 691.0 544.0 414.0 \n", + "2 AL Adamsville city 255.0 789.0 523.0 449.0 \n", + "3 AL Addison town 307.0 814.0 991.0 1.0 \n", + "4 AL Akron town 42.0 686.0 132.0 865.0 \n", + "\n", + " share_native_american share_asian share_hispanic income \n", + "0 0.0 0.0 16.0 11207.0 \n", + "1 1.0 1.0 31.0 25615.0 \n", + "2 5.0 3.0 23.0 42575.0 \n", + "3 0.0 1.0 4.0 37083.0 \n", + "4 0.0 0.0 3.0 21667.0 " + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will create pivot tables that aggregate the mean of the values in the columns for each state, then concatenate them into one dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "state_race = data.pivot_table(index='state', \n", + " values=['share_white', 'share_black', 'share_native_american', 'share_asian', 'share_hispanic'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [], + "source": [ + "state_poverty = data.pivot_table(index='state', \n", + " values=['poverty_rate'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [], + "source": [ + "state_educ = data.pivot_table(index='state', \n", + " values=['education'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [], + "source": [ + "state_income = data.pivot_table(index='state', \n", + " values=['income'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [], + "source": [ + "state_data = pd.concat([state_race, state_poverty, state_income, state_educ], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
share_asianshare_blackshare_hispanicshare_native_americanshare_whitepoverty_rateincomeeducation
state
AK10.6225354.55211322.411268412.892958378.030986164.60281741973.194366634.670423
AL6.104274213.40341926.18461512.299145653.859829187.50256437872.155556724.249573
AR5.005545148.27356741.6303147.114603699.589649205.60998233948.611830727.378928
AZ6.50332610.960089182.332594229.121951557.541020221.88913535057.401330643.731707
CA50.93823924.346912267.30814715.869908634.827201148.69579555697.653088684.221419
\n", + "
" + ], + "text/plain": [ + " share_asian share_black share_hispanic share_native_american \\\n", + "state \n", + "AK 10.622535 4.552113 22.411268 412.892958 \n", + "AL 6.104274 213.403419 26.184615 12.299145 \n", + "AR 5.005545 148.273567 41.630314 7.114603 \n", + "AZ 6.503326 10.960089 182.332594 229.121951 \n", + "CA 50.938239 24.346912 267.308147 15.869908 \n", + "\n", + " share_white poverty_rate income education \n", + "state \n", + "AK 378.030986 164.602817 41973.194366 634.670423 \n", + "AL 653.859829 187.502564 37872.155556 724.249573 \n", + "AR 699.589649 205.609982 33948.611830 727.378928 \n", + "AZ 557.541020 221.889135 35057.401330 643.731707 \n", + "CA 634.827201 148.695795 55697.653088 684.221419 " + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "state_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Let's look at the correlation between the economic attributes of the states:" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poverty_rateincomeeducation
poverty_rate1.000000-0.626781-0.477182
income-0.6267811.0000000.450140
education-0.4771820.4501401.000000
\n", + "
" + ], + "text/plain": [ + " poverty_rate income education\n", + "poverty_rate 1.000000 -0.626781 -0.477182\n", + "income -0.626781 1.000000 0.450140\n", + "education -0.477182 0.450140 1.000000" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "state_data[['poverty_rate', 'income', 'education']].corr(method='pearson')" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pd.plotting.scatter_matrix(state_data[['poverty_rate', 'income', 'education']], figsize=(10, 10));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Income and education have a positive but weak correlation. This means that with high levels of education, there is high levels of income.**\n", + "\n", + "**Income and poverty have a worthy negative correlation; meaning that high levels of poverty relate to low levels of income.**\n", + "\n", + "**There is a weak negative correlation between education and poverty. This means that low education levels relate to high poverty levels.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will look at the top and bottom five states in terms of poverty, income and education." + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [], + "source": [ + "#poor|rich states\n", + "poor_states = state_data['poverty_rate'].sort_values(ascending=False).head(5)\n", + "rich_states = state_data['poverty_rate'].sort_values(ascending=True).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(state\n", + " MS 246.044199\n", + " AZ 221.889135\n", + " GA 215.406699\n", + " AR 205.609982\n", + " LA 203.341772\n", + " Name: poverty_rate, dtype: float64,\n", + " state\n", + " DC 18.000000\n", + " NJ 76.143119\n", + " CT 77.375000\n", + " WY 78.549020\n", + " MA 89.170732\n", + " Name: poverty_rate, dtype: float64)" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poor_states, rich_states" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [], + "source": [ + "high_inc = state_data['income'].sort_values(ascending=False).head(5)\n", + "low_inc = state_data['income'].sort_values(ascending=True).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(state\n", + " NJ 78832.957798\n", + " CT 74141.520833\n", + " MD 71692.177606\n", + " MA 69822.195122\n", + " NY 68863.528428\n", + " Name: income, dtype: float64,\n", + " state\n", + " NM 29773.024831\n", + " MS 33512.030387\n", + " DC 33564.000000\n", + " AR 33948.611830\n", + " WV 34913.782716\n", + " Name: income, dtype: float64)" + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_inc, low_inc" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [], + "source": [ + "high_educ = state_data['education'].sort_values(ascending=False).head(5)\n", + "low_educ = state_data['education'].sort_values(ascending=True).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(state\n", + " DC 893.000000\n", + " HI 832.735099\n", + " MA 826.004065\n", + " ME 821.261538\n", + " WI 816.635779\n", + " Name: education, dtype: float64,\n", + " state\n", + " WY 567.490196\n", + " NM 610.611738\n", + " NV 624.503817\n", + " AK 634.670423\n", + " AZ 643.731707\n", + " Name: education, dtype: float64)" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_educ, low_educ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*These states standout from the above analysis:*\n", + "\n", + "**MA low poverty, high income, high education**\n", + "\n", + "**DC low poverty, low income and high education**\n", + "\n", + "**NJ, CT low poverty, high income**\n", + "\n", + "**WY low poverty, low education**\n", + "\n", + "**MS, AR high poverty and low income**\n", + "\n", + "**AZ high poverty and low education**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Top5 states" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "top5_states = killings.groupby('state')['state'].count().sort_values(ascending=False).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state\n", + "CA 424\n", + "TX 225\n", + "FL 154\n", + "AZ 118\n", + "OH 79\n", + "Name: state, dtype: int64" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_states" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**At this point, *AZ* stands out in the top5 states with most killings; and also has high poverty and low education levels.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will select economic data of the top5 states with most killings:" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [], + "source": [ + "top5_data = state_data.query('@state_data.index in @top5_states.index')" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
share_asianshare_blackshare_hispanicshare_native_americanshare_whitepoverty_rateincomeeducation
state
AZ6.50332610.960089182.332594229.121951557.541020221.88913535057.401330643.731707
CA50.93823924.346912267.30814715.869908634.827201148.69579555697.653088684.221419
FL14.952070123.931373147.2657954.345316709.960784160.60566448552.166667749.193900
OH6.69547336.90946520.6444446.383539815.370370135.60000048856.190123792.947325
TX9.19805451.793360308.7813397.001717718.241557168.23010945645.395535653.486548
\n", + "
" + ], + "text/plain": [ + " share_asian share_black share_hispanic share_native_american \\\n", + "state \n", + "AZ 6.503326 10.960089 182.332594 229.121951 \n", + "CA 50.938239 24.346912 267.308147 15.869908 \n", + "FL 14.952070 123.931373 147.265795 4.345316 \n", + "OH 6.695473 36.909465 20.644444 6.383539 \n", + "TX 9.198054 51.793360 308.781339 7.001717 \n", + "\n", + " share_white poverty_rate income education \n", + "state \n", + "AZ 557.541020 221.889135 35057.401330 643.731707 \n", + "CA 634.827201 148.695795 55697.653088 684.221419 \n", + "FL 709.960784 160.605664 48552.166667 749.193900 \n", + "OH 815.370370 135.600000 48856.190123 792.947325 \n", + "TX 718.241557 168.230109 45645.395535 653.486548 " + ] + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_data" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot2015-02-01shotgun53.0MASheltonWATruehighNot fleeingFalse
14Lewis Lee Lembke2015-02-01shotgun47.0MWAlohaORFalsehighNot fleeingFalse
25John Paul Quintero2015-03-01shot and Taseredunarmed23.0MHWichitaKSFalsemediumNot fleeingFalse
38Matthew Hoffman2015-04-01shottoy weapon32.0MWSan FranciscoCATruehighNot fleeingFalse
49Michael Rodriguez2015-04-01shotnail gun39.0MHEvansCOFalsehighNot fleeingFalse
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 2015-02-01 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 2015-02-01 shot gun 47.0 \n", + "2 5 John Paul Quintero 2015-03-01 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 2015-04-01 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 2015-04-01 shot nail gun 39.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True high \n", + "1 M W Aloha OR False high \n", + "2 M H Wichita KS False medium \n", + "3 M W San Francisco CA True high \n", + "4 M H Evans CO False high \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False " + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [], + "source": [ + "killings['race'] = killings['race'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 2535 entries, 0 to 2534\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 2535 non-null int64 \n", + " 1 name 2535 non-null object \n", + " 2 date 2535 non-null datetime64[ns]\n", + " 3 manner_of_death 2535 non-null category \n", + " 4 armed 2535 non-null object \n", + " 5 age 2535 non-null float64 \n", + " 6 gender 2535 non-null category \n", + " 7 race 2535 non-null category \n", + " 8 city 2535 non-null object \n", + " 9 state 2535 non-null object \n", + " 10 signs_of_mental_illness 2535 non-null bool \n", + " 11 threat_level 2535 non-null category \n", + " 12 flee 2535 non-null object \n", + " 13 body_camera 2535 non-null bool \n", + "dtypes: bool(2), category(4), datetime64[ns](1), float64(1), int64(1), object(5)\n", + "memory usage: 173.9+ KB\n" + ] + } + ], + "source": [ + "killings.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [], + "source": [ + "#group state killings by race\n", + "race_count = killings.groupby(['state', 'race'])['race'].count()\n", + "race_count.name = 'race_count'" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "state_race_count = race_count.reset_index(level='race')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will select the top5 states and their counts of killings." + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "top5_race = state_race_count.query('@state_race_count.index in @top5_states.index')" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
racerace_count
state
AZA0
AZB5
AZH37
AZN8
AZO0
AZW68
CAA15
CAB65
CAH169
CAN1
CAO8
CAW166
FLA1
FLB49
FLH18
FLN0
FLO2
FLW84
OHA2
OHB30
OHH0
OHN0
OHO2
OHW45
TXA2
TXB46
TXH66
TXN1
TXO3
TXW107
\n", + "
" + ], + "text/plain": [ + " race race_count\n", + "state \n", + "AZ A 0\n", + "AZ B 5\n", + "AZ H 37\n", + "AZ N 8\n", + "AZ O 0\n", + "AZ W 68\n", + "CA A 15\n", + "CA B 65\n", + "CA H 169\n", + "CA N 1\n", + "CA O 8\n", + "CA W 166\n", + "FL A 1\n", + "FL B 49\n", + "FL H 18\n", + "FL N 0\n", + "FL O 2\n", + "FL W 84\n", + "OH A 2\n", + "OH B 30\n", + "OH H 0\n", + "OH N 0\n", + "OH O 2\n", + "OH W 45\n", + "TX A 2\n", + "TX B 46\n", + "TX H 66\n", + "TX N 1\n", + "TX O 3\n", + "TX W 107" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_race" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will create a dataframe that contains the percentage share ofeach race killed in each of the top5 states." + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [], + "source": [ + "race_pivot = top5_race.pivot_table(index=top5_race.index, values='race_count', columns='race', aggfunc=['sum'])\n", + "race_pivot.columns = ['sum_asian', 'sum_black', 'sum_hispanic', 'sum_natives', 'sum_others', 'sum_whites']\n", + "race_pivot.drop('sum_others', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": {}, + "outputs": [], + "source": [ + "race_share = top5_data[['share_asian', 'share_black', 'share_hispanic', 'share_native_american', 'share_white']]" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sum_asiansum_blacksum_hispanicsum_nativessum_whitesshare_asianshare_blackshare_hispanicshare_native_americanshare_white
state
AZ05378686.50332610.960089182.332594229.121951557.541020
CA1565169116650.938239240.000000267.30814715.869908634.827201
FL1491808414.952070123.931373147.2657954.345316709.960784
OH23000456.69547336.90946520.6444446.383539815.370370
TX2466611079.19805451.793360308.7813397.001717718.241557
\n", + "
" + ], + "text/plain": [ + " sum_asian sum_black sum_hispanic sum_natives sum_whites \\\n", + "state \n", + "AZ 0 5 37 8 68 \n", + "CA 15 65 169 1 166 \n", + "FL 1 49 18 0 84 \n", + "OH 2 30 0 0 45 \n", + "TX 2 46 66 1 107 \n", + "\n", + " share_asian share_black share_hispanic share_native_american \\\n", + "state \n", + "AZ 6.503326 10.960089 182.332594 229.121951 \n", + "CA 50.938239 240.000000 267.308147 15.869908 \n", + "FL 14.952070 123.931373 147.265795 4.345316 \n", + "OH 6.695473 36.909465 20.644444 6.383539 \n", + "TX 9.198054 51.793360 308.781339 7.001717 \n", + "\n", + " share_white \n", + "state \n", + "AZ 557.541020 \n", + "CA 634.827201 \n", + "FL 709.960784 \n", + "OH 815.370370 \n", + "TX 718.241557 " + ] + }, + "execution_count": 256, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_race_data = pd.concat([race_pivot, race_share], axis=1)\n", + "top5_race_data['share_black']['CA'] = 240\n", + "top5_race_data" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [], + "source": [ + "top5_race_data['%_asian'] = round((top5_race_data['sum_asian'] / top5_race_data['share_asian']) * 100)\n", + "top5_race_data['%_black'] = round((top5_race_data['sum_black'] / top5_race_data['share_black']) * 100)\n", + "top5_race_data['%_hispanic'] = round((top5_race_data['sum_hispanic'] / top5_race_data['share_hispanic']) * 100)\n", + "top5_race_data['%_natives'] = round((top5_race_data['sum_natives'] / top5_race_data['share_native_american']) * 100)\n", + "top5_race_data['%_whites'] = round((top5_race_data['sum_whites'] / top5_race_data['share_white']) * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "top5_race_data.drop(['sum_asian', 'sum_black', 'sum_hispanic', 'sum_natives', 'sum_whites',\n", + " 'share_asian', 'share_black', 'share_hispanic', 'share_native_american',\n", + " 'share_white'], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [], + "source": [ + "top5_race_pc = top5_race_data.astype(np.int64)" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
%_asian%_black%_hispanic%_natives%_whites
state
AZ04620312
CA292763626
FL74012012
OH3081006
TX2289211415
\n", + "
" + ], + "text/plain": [ + " %_asian %_black %_hispanic %_natives %_whites\n", + "state \n", + "AZ 0 46 20 3 12\n", + "CA 29 27 63 6 26\n", + "FL 7 40 12 0 12\n", + "OH 30 81 0 0 6\n", + "TX 22 89 21 14 15" + ] + }, + "execution_count": 260, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_race_pc" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['%_asian', '%_black', '%_hispanic', '%_natives', '%_whites'], dtype='object')" + ] + }, + "execution_count": 261, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "races = top5_race_pc.columns\n", + "races" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAF8CAYAAAA6gkcXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdd3hUZdrH8e85U5JJJSQhdAhNpCgoxYaIWLEgiyiKKJYV++qqq7LY3dXV1d0VX+u6qyI2LAtiAVdXaYJSpPeaQAglpE1JZuac948BBKWEZGaec2buz3XlUkIy8wOSuXPO8zz3rZmmaSKEEEIchK46gBBCCOuSIiGEEOKQpEgIIYQ4JCkSQgghDkmKhBBCiEOSIiGEEOKQpEgIkaTWrFnDjTfeyMiRIxk6dCjPP/88e3fEf/755/To0YPS0lLFKYVqmpyTECL5VFZWMmLECMaNG0fbtm0Jh8P87ne/49RTT+WKK67g2muvpWvXrqSkpHD77berjisUkisJIZLQ119/Td++fWnbti0ADoeDv/zlLwwdOpSioiIqKioYPXo0kyZNIhgMqg0rlJIiIUQS2r59O61atTrgfenp6bjdbj788EOGDh1KZmYmPXr04KuvvlKUUliBU3UAIUT8NW/enOXLlx/wvqKiIkpKSvj0009p0aIF33zzDRUVFbz99tsMGjRIUVKhmlxJCJGEBgwYwIwZM9i8eTMAwWCQp556ihUrVtCtWzfGjx/P66+/zocffsiuXbtYuXKl4sRCFVm4FiJJLV26lKeffhrTNPF6vQwYMIBly5YxbNgwBg4cuO/jXnvtNTZv3szjjz+uMK1QRYqEEEKIQ5LbTUIIIQ5JioQQQohDkiIhhBDikKRICCGEOCQpEkIIIQ5JioQQQohDkiIhhBDikKRICCGEOCTp3SSSimma1IRrCBqRzqYu3YXb4SZkhAiEAvhCPlIcKTRCRytZBLVeCPqgpiryFvSDwwWuNHCmgNMDzlRwun/+ryM18jHOlMh/HSngaRT5/VAAjBCggcMNrlS1fyFCHIGcuLaB4uJiLr74Yrp27brvfX379mX69Ol88MEHCpNZj2ma+EN+wmaYFEcKAOU15ezw7aDEW8Lmqs1s826j1FfKdt92tvu2U+YvI2SG9j3GqK6juKPZAFyvDYhuOGcKZDSFzALIbAYZBZDdEnLaQlaLyPs9jSMfFwyAGQZXOjjkZzmhjnz12USHDh0YP378vl8XFxczffp0hYnUMkwDf8iPaZqkOlOpqq1iY+VGlu9azqqyVayvWM+Gig1U1lYe9WN7g15Cbg+uaIcO1UD5psjb4ThTIkWjybFQ0A1a9oaCrpCeH7mS0XRwp4OmRTuhEL8iRULYgi/ow8TEoTlYVbaKBdsXsHr3ataXr2dD5Qb8IX9Un8twKrwNFKqBsvWRt5Wf/fx+ZyrkHwNNukCzHtCyF+R1/Pk2ljMl8v9CRJEUCZtYu3YtI0eO3PfrO++8U2Ga2AqGgwTCAVIdqWz3b2dB6QJ+2PYDi3YsYmPFRkxie4fUG/JiONwxfY56CQWgZFHkbdG7P78/rTE06QrNe0DHc6DFiWAaoDnAnaYur0gIUiRs4mC3mxJF2AjvWzBevms5M7fM5KftP7F011K8QW/c83iD3siCs134ymDjjMjb7HGR21D5naHNqdDhbGhzMujOyG0ql0d1WmEzUiSEEr6gD4fmoCxQxv+K/se3xd+yoHQBNeEa1dHwBX2RF1W7Mk3YviLy9uM/I+/L7wztBsCxF0WuNMK1kYJhp2IolLDxd4JYs2YNv/nNb/b9+v7776dPnz4KEx3a3i2mDt3B/NL5fLXpK2ZtmUWpr1R1tF/xBX1odi4SB7NjZeRt7kuRwtCyN3Q4C7oPg7Q80B2RNQ0hfkG2wIqYCRth/CE/hmnwxYYv+GzDZyzesZiwGVYd7bCapDVhyiWf4vlTM9VR4iP/GOh2KfS4Ejw5oLsi5z2EQIqEiDLDMPCFfABM2zSNyesms6B0QcwXm6Mp3ZXOjMun43o8X3WU+GtybOTq4vgrIDVbCoaQIiEazjRNvEEvuqbz9eavmbR2EvNK51n+iuFQdE3np5E/oT2WE7m/n6wKukL3y+D44ZCSETkhbsVdXyKmpEiIeguEAmiaxo/bfmTCignM2TrngJPLdrbgqvm4nusC3h2qo1hD0+MiBaPniMiaRkqm6kQiThJsdU7Eg7fWS024hndWvsNHaz5ip3+n6khRVxOuwZVRIEVir22LI29fPwydzod+v4/cmtLd0jYkwcm/rqiTmlBka+rC7Qt5Y9kbfF/yPYZpKE4VO4FQgIz0PNUxrMcIw8opkbf8znDybdD90sjhPXe66nQiBqRIiMPyBr0EjSDvrXiPiWsmst23XXWkuPCH/JFeSeLQdqyEybfB1DGRnVGn/i5yG8qdIX2lEogUCXFQ3qCX8kA54xaOY+rGqQmz1lBXvpAP0nJVx7CHmkqY+zL88ErkwN5pd0XOYWgO2RmVAKRIiH32ttkuqiriHwv+wYwtM1RHUsYb9EXODIi6M01Y903krVEb6HsznHi1tAOxOSkSgrARJmgEWbJzCeMWjmPh9oWqIylXXVslRaIhyjfB1Pvhu6fgtDuhz+g9xUK61NqNFIkkFjJChIwQM7fM5KVFL7F692rVkSyjKlgdOUwmGiZQDv99BGY/D/3+AL1GRYqFtACxDSkSSWjvlcPsrbP567y/UlRVpDqS5VTWVEKKFImo8ZVFrixmPQdnPBA50S1rFrYgRSKJmKZJIBxg6c6lPPXDU3LlcBgVtRWQ1VJ1jMRTvR2m3AXT/wpnjoWuQyIdd6UbrWVJkUgSvqCP4qpinpj7hKw51IE36CXoTo/+CFMRUbkF/nMzfPcXGPgQHDMo0idKDuZZjvyLJDhf0Ic36OXJH57kq01fqY5jGzGbcy0OtHsjfHgd5HaAsx+HdmfIND2LkSKRoGrDtYSMEOMWjuO9Ve8RMpLrnENDKZ9znWx2rYX3roDWJ8ElL0FGgZzgtghddQARff6Qn2+LvmXQx4N4e8XbUiDqwbJzrhPd5jnwQi/4+jGorYaQ+kmFyU6uJBKIL+ijvKacP878I/NK56mOY2u2m3OdSIxw5AT30g/hgr9htB+IniJXFapIkUgAISNE0Ajyr6X/4vWlr8uVQxTYfs51IvDuxKz1UlJZS9ihkZfuJi1F/k3iTf7Gbc4X9LFk5xIenv0wW6q3qI6TMBJyzrXdtOyF2WUwlz33PaWVNdx0RntuPaMDLoeG0yF3yuNFvgtsqiZUQyAc4KFZD/FN0Teq4yQcb8iLQ4qEOroDc8irvD2vlC3lAQBe+GYtk37awnOX9aBr8yzS3PLvEw9Sjm3IF/Qxa+ssBn08SApEjHiDXpxSJJQx+96Mz53HQ5OXH/D+ojI/w17+nsenLMdXGyJsJO5ME6uQ7wIbCYaD1Bq1PDr7Ub7Y+IXqOAnNH/Lj1F2RuQgy4Te+MpvBgDHc8vbSQ37Iuz8U8f26Xbw6shctG3vkqiKG5ErCJvauPVz8n4ulQMSBYRqEjCCkyXS6eDMv/Bs/bfXy3erDj8XduMvHoOdn8O9ZG/HXhuOULvlIkbC4sBHGH/LzjwX/4Jovr0mayXBWUBOuiRzqEvHT/kzMtqdzzRt1ax0TMkyembqKEf+cw/aqAIGgFItokyJhYb6gjw2VG7js08t4Z+U7quMknUAoADLnOn6cKZiDX+Tv04upDBzdNu4Fm8sZ8My3TFtWiq9WtoBHkxQJi/KH/Hyy5hOGTR7GxsqNquMkJZlzHV/m6X+gLJzK81+vrdfne2vD3PHeQn7/wSKqA0GCYVnUjgZZ7bEYwzAIhAOMmTmGrzd/rTpOUpM513HUuB2cdAvXvbqgwQ/15dJtLNi0m3+N6k27/HRZ1G4guZKwkEAoQIm3hMunXC4FwgJkznX8mINf5Nt1FSwqrojK422vqmHIi7P4dNFWuf3UQFIkLMIX9DFryyyGTB4it5csQuZcx0nXIYSadOPmCT9F9WGDYZP7PlrC41OWy+6nBpDrMAvwh/yMWziOt1e8rTqK2I/MuY6DlEzMC57jkS83EgjFZg3h3R+KWLWtin+N6k16ihOXtPQ4KvK3pVAwHKS8ppzfTvutFAgLisy5zlIdI6GZZz3ClmqYMHdzTJ9nweZyzvnbdNZur5ariqMkRUKRQChAcXUxv5n0GxbtWKQ6jjiIitoKSMlUHSNxNe0Ox1/JyDfj8/W/vaqGwS/M4sulJbJOcRSkSCjgD/pZvHMxl0+5nB3+HarjiEPYO+daxICmYQ55hY8X72TDLl/cnrY2bHDXB4t48vOVckVRR1Ik4swX9DF141RunHZjZB++sKy9c65F9JknjKImoxX3frxEyfOPn7OJa/79A9U1IUzpzXVYUiTiyB/y89ri13hw9oOETfkpxupkznWMpOfBOY9z13/WoLKJ6w8byrj0pdlU+IPSTfYwpEjEiT/k58GZD/LPpf9UHUXUkcy5jg3zvKdZtaOGL5ZsUx2FlduquHDcTHZU1coJ7UOQIhFjhmFQXVvN6K9GM3XTVNVxxFGQOdcx0PpkzGPOY2QdG/jFQ/FuPxc8P4PNZT5qpEHgr0iRiKGwEaaitoIrPruChdut800h6kbmXEeZ7sQc8gr/nLONHdW1qtMcYJe3lsEvzGJ5SSV+2fl0ACkSMRI2wpTXlHPFZ1fICWqbkjnX0WWecgdVjmz+/PlK1VEOqromxPBX5zB3Q5lskd2PFIkYCBkhdgV2Mfyz4Wyp3qI6jqgnmXMdRdkt4fR7Gf3+CtVJDqsmZHDdGz/yxZJtUij2kCIRZcFwkB3+HQyfMpxtXvULc6L+ZM519JgXPc8Pm6v4fl2Z6ihHZJhw98RFjP9+kxQKpEhEVTAcpNRXyhVTrpBDcgnggDnXov46nYvRsi/XvRXdBn6x9uQXK6VQIEUiamrDtWz1buXKz65kV2CX6jgiCmTOdRS4PJgXv8DT3xThteGL7ZNfrOTD+cVJXSikSERBbbiWoqoirvzsSnbX7FYdRxxEaEqI2pdqCU4MYoYjJ2xNv0nwzSBm6NAnbvfOuT5zcj5Ldv361tPcUjcXfl7/wUTHvNuUsprEvVIxz/gjO2pcvDJ9veoo9fbQpGV8tjh5+z1JkWigYDhIqbeUq7+4msraStVxxEEY2w3MKhP3zW60DA1zfaQohL8L4zjFgeY89It0IBSADBlhWi95naDPDYx6W03rjWj6w0eL+Wbl9qQsFLIq1wB7t7le/aUUCCvTHBqEiFxB1AIOMEojhUPvePifk/wh/77bTe+sSWPljy5qw3BtZx+Xtj+w99aGSgePzcvCG9LY4XfQOSfI308tJ8UBi3a6eGJBJv6QhkuHP/So4uSmP58V2OHXufZ/OVzRwc+ITvFreBdL5iUvM3VlGctLqlRHaTDThN+99xOvXHUip3bIw+N2qI4UN3IlUU+maVIdrOaaL69hp3+n6jjiMLRcDa2VRvCfQXCBVqgR/jqMc+CRf0baf851igM+OW8X/xqwm+cWZbCm4sDP/2BdGpcU+vngnDKmXbSD4moH325NIWjArTMacWtXL1MG7eLxPhX8eUEmxp67XKU+B6O+yeHGLt7EKRDHXU4wtxN3vL9YdZSoCRsmN709nx83luFPopPZUiTqyRfycd3U6yiqKlIdRdSB8wwn7tFunIOcGEsNtOYapEDokxDB94IYGw7et2f/OdfDO0RewAvSDE5tVsv32w7s63Rvjyoapxq8tjydR37MYrvfgS+osbrcia7BGS1qAOjWOMSng3ah77nL9dvvcvA4TS5qE4jRnz7OUhvBoGf445T11MZo2pwqIcPkhjfnsbS4gkCSFAopEvXgD/m55b+3sHr3atVRxFEya0yM+QaOUxyEfwyjddBwDnUS/urg3/D7z7nW91u6MExw6gcueP9+djYfrE2jRXqYUZ19dG0cxETDof16F+3qcid7Xz8f612JrsG/V6ZF7c+pknnOE2wqDzFxfrHqKDFRGzYY+a+5rC6toiaU+IVCisRRCoQC3PPdPSzYvkB1FFEP4Rlh9D46mjuyToEOaGAGD77Daf85159siMyW2OrV+X6bm5MLDuw/NLMkhVu7VTNozxXBol0uwia0ywqhAbNKIlcey8qcXPNNzr7bTT3yannqpApeWpbB6nKbLxM2PwG6DWXEm/Y6E3G0AkGDq16fy67q2oRvM27zr8j48of8PDHnCaYXT1cdRdSDudPE3GHiPCvyZe/o6SD0SYjw9DCOUw++EBmZcx0pEjVhGPJlLkEDxvaqojArzHb/z5931/FV3DqjEWlOkwyXSe8mtWyucuB2wLjTdvPnBVk8/ZOGSzcZ16+c/dc+22WFuaVbNfd+n83Ec3Zhy3VRTccc8grvLtzBlt0JcuvsMCr9Ia54bQ5Tbj+NzNTE/XlbM2UsU534gj7eWPYGLy16SXUUEUe39riVm7K6wpsXqo5ieWbfm/D3G0OXJ2aqjhJXfQob8+a1fRJ2x5NcSdRBIBRgTskcKRAJLLw0jDHn17cN3kp9C+28cxmtIJOtZBTAmQ9y+7vWbuAXCz9sKOPBSUt4bHA30tyJ95KaeH+iKAsZIbZWb+W+6fepjiJiyNHNgaPbr38SPK/TeVzdYRj833MKUtmHecGzLN3m4+uV21VHUeLD+VvoVJDJVSe1SbhCkbg30qLEG/Ry41c3Eggn/j1W8Wsy57oOCvtjtjuTa95cpDqJUk9+sZK568sSbmusFInD2LvVtdRXqjqKUETmXB+Bw415yUv836ytlPmsNW0u3kwTbp4wn+Ld/oSaly1F4hD27mRavDNxToyKoydzrg/P7Hc35WYaz06TM0MQ2Rp75WtzqAqESJQ9QVIkDsIf9PPx6o+ZvG6y6ihCMZlzfRg5beGUO7ghCRerD2d7VQ3X/OsHAsHEuJqQIvELteFaVpSt4Ol5T6uOIixA5lwfmnnxC8zaUMn8TdIe/5eWbKngr9NW4quxf9dYKRK/4Av6uPN/d2KYifFTgGiYeMy5XrTTxcivGwOwqcrBFV815sr/NubhH7P2ncp+6IcsLpvWmP9siCyiV9Vq3DM7O6a5DuvYiwg368GNExaqy2Bxr8/cyLxNu23fukOKxH78IT+/+9/vZHCQ2CfWc65fW57O2B+yqNnzOvLkwkzuPK6ad84qwwS+Lk5hd43GzoDOe2eX8dH6SH+nV5anc2MXb8xyHZY7HfPCf/DEtM34auWHqcO5/d2FVAfsfTUhRWKPQCjA60tel55M4gCxnnPdOjPEuH7l+369rMxFnyaRXUKnN6thdqmbFAeEzEhbELduUlTtwB/S6NRIzYuPOfAhSnwab8zeqOT57aTCH+S3b83DX2vfqwkpEoAZDOI2dUYUXkqbzDaq4wgLifWc63Nb1bD/YDyTn+tRutOkqlYnzWlyZosafj+7Ebd1r+bFpelcfYyPJ+Zn8ucFmfhCcRx/2qQL9Lyaq9+SXX91tWBzOeO+WYPXpusTUiQAIxBg3TnnYU6exqTzPmBE5xGqIwkL2TvnOh72/4b0hjSy3JHbOcM7+Hnp9HJME1pnhvm+1E2v/FpOyKtlysY4HfbTNMwhr/DpsjLW7lB0q8umXvpuHUu3VthyvkbSFwnD72frvX8gVFJC6Z+fZMsdv+PeY2/hzbP+hVuXQ1QivnOuu+QEmVsa+bqbXpJCr/zgAb//xqp0Rh3jJRD6eU5FvK4kzB5XUZvdljsnJvfJ6vowTbj57QW2nJGd1EXC8Pup+HQK1d9+u+993pkzWX/BhXTe7mT64Gl0ze2qLqCwhP3nXMfafT2rGLckg8unNSYYhnNb/dwO5rNNqQxoHsDjhPNaB3h9ZTpvrkrn/NZxaBnjyYFzn+SeSWtJ8PEJMVPmrWX0+Pm2W59I6lbhwa1bWXf+IMyaml//pq6TO3o0uTf+ln+ueYsXFr4Q/4DCEj68aCLHzP03zH1ZdRRlzCGvsiZ/IOc8P1d1FNsbe8GxjOjbGo9NGgEm7ZWE4fez5fd3H7xAABgGu156iaLrruP6FpfyyXkfkO5Mj29IYQn7z7lOSq36YB57Ede8IbeZouGZqauo8MfvttOrr77KaaedRs2e17pnn32WkSNH7nvr2bMn77zzziE/PymLhOH3U/6fSfh/OvKIRf/Cn1h3/iAKVu3gu8FTOaX5KXFIKKxk/znXSUd3YA55hTd/LKWkUjohR0NNyOCO9xbij9P6xKeffsqgQYP47LPPALj77rsZP34848eP57LLLqNdu3YMHTr0kJ+fnEXC62X7M8/U/eMrKym+cTRlz/2DF0/7Ow/1fSiG6ezP2GIQfPvABdfwsjDBN39+X+jzEME3goSXRO7PmgGT0CRrLurtP+c62Zgn3YrXlcujny5XHSWh/LChjCmLS2LeVnzu3Lm0bt2a4cOHM2HChAN+b9myZfztb3/jhRdeICUl5ZCPkXRFwvD72XLvHzB9vqP+3PJ33mXT5ZczOPMUpl04hbzU+Cxm2kn4+zDhz8Ow3+u9UWpg/GREDgEAps/E9Jk4r3FiLDL2fZ5+sjW/HCNzrrNUx4i/rOZwxv3c/MFK1UkS0qOfLscX40XsiRMnMmzYMNq1a4fb7WbRosgtw7KyMu666y6efvppmjVrdtjHsOZ3ZYwYgQBV//0a3/ff1/sxalavYf2FF+GZtYhpF37KoMJBUUxof1qOhnPozwtyps8k/L8wjrP3m/rmBMJECokTzHITakFvYs0vx4raCkjJVB0j7swL/86C4mpmrNmpOkpCqq4JcfcHi2K2LbaiooLp06fz1ltvcf3111NdXc3bb79NOBzmrrvuYtSoUfTq1euIj2OP5fUoMWtq2Pb44w1/HL+fkj/cR9YFg/jz449zTquzuXP6XVFIaH96Zz3yog+Yhkno8xDOs5wHfKVpbg29o07oPyEcpzkIzwzjOMVBaFoINHD0d6C543iK+Ai8QS9BdzpJNVWiw0CMNqdx7ZMzVSdJaP9btZ0Zq3cyoHM+buevx+c2xOTJkxk6dCj33RcZvez3+xk4cCBjx46lVatWXHnllXV6HGv+6BYDhs/H1jF/xKisjNpjVn72ORsuHsypoTZMH/yVtPT4BXObCWUQ+jJE6D8hzJ0moa8iPzU5TnDgGhZ52dVyNIyNBnorHb2ljrHMWhvxvUEvIbdHdYz4caZiDn6Rv31bTKXNm9PZwQOfLKEmBiexJ06cyODBg/f92uPx0L9/fz7++GPWrl17wA6nt99++5CPkxTnJIxgEO/s2RSPvik2T+ByUXDvPWRfeinPLHmeCSsnHPlzEphZbhL6TwjXKNdh3wcQ/CiI82InxkIDLVsDE8xKE0ef6P5U1RAXFF7Ag91Hk/6PHqqjxIU58GF2dbuWXn+ZrTpK0hjcozlPDulOWor1bu4kx5VEOMy2Rx+L3eMHg9LSox7Cy8LoHXU0l4beWSc8N0z4hzD6sdb6skyqOde57eGkm7h2/DLVSZLKpJ+2smRrBWELHme31ndjDBh+P2Xj3ya0dWvMn0taekRojbRfXTEc7H2Org4cx0WuGLQsDdfVLlxXu9AyrbMeAck159oc/BLfrK5gydYK1VGSzpiPl1Abtt6NnYQvEmZNDTtfeiluzxfasYNNI67C++8JTBj4b27reVvcnlvERtLMue42lFB+F25+98iHTEX0rdvh5aP5xTE/O3G0ErpIGF4vpU89Va8zEQ17YmnpkUiSYs51ShbmBc/y0BcbbdnOOlE8PXUlobC1/v4TtkiYpkmwpISKSZOVZZCWHokhHnOuVTPPfoyiSoN3f9isOkpSq/SHeHrqKksNKErcIhEIUPLgg5FG7grta+nxt+elpYdNxXrOtXLNjofjLmekTJuzhAlzN7PbV6s6xj4JWSTMYBDvrNn4F1rn3mr5hHfYdPlwaelhQ7Gec62UpmNe8gofLdrBpl1xvi0rDipsmDw8eZllriYSs0iEw2x74gnVMX6lZvVqaelhQ7Gec62SeeK1BDJa8IdPlqqOIvbz9YrtrN/hxQrH2BKuSBiBAOXvf0Bo2zbVUQ5qb0uP7Q89zJ97PczfT/+b6kiiDuI55zpu0vPh7Me486PVMm3Ogsb+Z4kldjolXJHANNn5svUniElLD3uJ55zreDEHPcOK7X6mLi9VHUUcxKLiCmav26V8t1NCFQkjEGD3+x8Q3r1bdZQ6CRYXs+E3QzEnT2PSeR8wovMI1ZHEIcRzznVctDkVs8M5XP2mddbtxK/96bMVhAy1t5wSqkhgmux65RXVKY6OtPSwBV/IB2m5qmNEh8OFOeRlXv1+GzurrbOLRvza+p1eZq/bqbRdR8IUCbtdRfyStPSwtkSac22e8jsqtSye+lKGCdnBs9NWUxtSdzWRMEUCsN9VxC9ISw/rSpg5141aQ7+7+e17Mo7ULpZtrWRRcTmGottOCVEkjECA3e+9b9uriAP8oqXHx+e9T5ozTXWqpJcoc67Ni55nzqZKftiQAN8rSeSZqasIhNTsdEqIIgH2v4r4pb0tPZqu2sn0wdOkpYdiCTHn+pjzMVr05obxi1QnEUdp/qbdrNtereS5bV8kjJoaym28FnE40tLDOmw/59qVhnnxOJ78ejPeGM1UFrH1ly/V9HSyfZEA2PX666ojxJS09FBv75xruzIHjGV7wMk/Z2xQHUXU08y1OympCMT9eW1dJEzDwDtrNqHt21VHiTlp6aGWredc5x8Dva/jmreWqE4iGujpL1dSHeerCXsXiUCAXa++qjpG3EhLD3V8QR+GM1V1jHoxL3mZL1fsYmVpleooooG+WlFKeZw7xNq6SARLSvD/lHwnRqWlR/zZdc61efwVBHM6cPt7slidCEwTnv96bVzXJmxbJMJeLztfTqwdTUdDWnrEly3nXHty4Py/cN+n65Fhc4nj00Vb0ePYtt62RYKwQeWXX6pOoZa09IgbO865Ns/9Mxt2h/hk4RbVUUQU+YNhJi/aErfGf7YsEkYgQNnb4yEYVB3FEqSlR+zZbs51ixOhyyWMlAZ+CenfszYSlCJxGJrG7gnvqE5hKdLSI7ZsNedad2AOeYUJ87ezpTz+WyZF7K3cVvo/Y4IAACAASURBVEVRmT8uz2W7ImEaBtXffUd41y7VUaxHWnrEjJ3mXJt9RuNLacLYSctURxEx9OqM9XFZwLZdkTB8Pna//bbqGJb2y5YeJzc/WXUk27PNnOvMpnDmWG79cJXqJCLGpizeGpcvR9sVCbM2iO/HeapjWN7+LT1eOu0f0tKjgewy59q84DkWbfXy7aodqqOIGAsEDT5ZGPsFbFsVCaM2SPlHH0Y2C4s6kZYe0WP5OdftBmAWnsGotxaqTiLi5I1ZGwmGY/t6aKsiQThExccfq05hO9LSIzosPefamYJ5yYuMm7GFcp808EsWa7ZXs3GXN6bPYasiEdyyhdoNG1XHsCVp6dFwVp5zbfa7h91GGn/77xrVUUSc/Wvmhpj2c7JNkTB8Pna/I9teG0paetSfZedcN24HJ9/G9ROWqk4iFJi6fBsuR+xWsG1TJHA4qPz8C9UpEoK09Kgfq865Ngf/H9PXVbCwqEJ1FKFApT/Eohj+29umSPjmzSNcXq46RuKQlh5HzZJzrrsMJtzkOEZPkJPVyWzi/KKYnZmwRZEIV1dT/u57qmMkJGnpUXeWm3PtzsC88O88OnUDAengl9SmLSvF5YjNy7ktioTmclE9Y4bqGAlLWnrUjdXmXJtnPcJWL4yfs1l1lAbTyjbhmvF/kV/UVOH8/nVc01/A9d3zUL0TAOfCD3B9+3f0zT9GPi7oxzlPDtYCVPiDLNkSm1tOtigSvvkLMGtqVMdIbNLS44gsNee6oBv0uIqRby1WnaTBHKu/wbnwfQhHGnY6l07BaHUiwdNvI9TlfPTq7VDjhZpqgv3vwLHph8jnrfqacKeBKqNbygfzYnPLyfJFIuz1UjnlU9Uxkoa09Dg0y8y51jTMIa8waelO1u+I7R75eDDTcwn1vXbfr7VdG9D85bhmvoSjaAFGXntwOMEIQzgUadnu3YUWrsXMaqYwubVMW7YNZwx2OVm+SGhOJ9Xffqc6RlKRlh4HZ5U512bPq6nJbM3vP7T/VQSA0eJ4TM2x79earwzTnUbwtJsx0xrhWP0NOFMwmnXF+eN4Qp3PxbnyK0Lt++FY9DGOxf+BkNxp2O0LsnxrZdQf1/JFonbTJsJlZapjJKUDW3p8mvQtPSwx5zqtMZz7J+75z1qMRF2rdqdjNI1soDCadkUrL4r8f+EphE6+HjAxM3LRd6zBzGuPmVuIXrRAYWDrmDivGF+UbzlZukgYNTVUTvlMdYyk9nNLj8VJ39LDCnOuzfOfYfWOAFOWlCjNEUtGbiF66QoA9J3rMTObHvD7jrXfEW7fHy0c3NeVVwvXxj2nFU2NwS0nSxcJwmGq/vtf1SmS3gEtPU58KGlbeiifc936JMxjBnH1W4vUZYiDUPeLcWyeh+u759G3ryR8zFn7fk8vXhi5ynC6Cbc4Hseab3Gsm064xfEKE1vHLm8tm6M8jEgzTeu2VA3u2MHafqerjiH242rZkpYvvYg/P5OR39zApqpNqiPFTdfcrrw28EUy/1IY/yfXnZi3z+dfSw0e/2xF/J9f2MYD53fm+n6FOPXoXANY9krCDIepmjpNdQzxC8nc0kPlnGvz5NuoduRIgRBHNH3NDny14ag9nmWLhOHzUf2/b1THEAeTpC09lM25zmoB/e9j9PtSIMSRzdu4m1Sn48gfWEeWLRJ6aiq+hdKPxsqSraWHqjnX5kXP82NRFbPXyVx3cWQ1IYNV26K3FdayRaK2qAjT51MdQxxBMrX0UDLnuuM5GK1P4vrx8gOTqLtpy0upjVI/L0sWCTMcll5NdpIkLT3iPufa5cEc/AJ//V8xVQGZNifqbuaandSEorMuYckiYfh8+L6fozqGOErJ0NIjnnOuzf73s7PWzUvfrovL84nEsXhLRdS6wlqySOgpKfjmz1cdQ9RDorf0iNuc67yO0Hc014xfEvvnEgknbJj8VBSd+TuWLBLBbdswqqpUxxANkKgtPeI159oc/BL/XbWb5SXyfSDqZ9rybQSCDb/lZLkiYRoG1bNmqY4hoiARW3rEY8612X0YobzO3PpeYp+sFrH1/bpdhIyGn5W2XJEwvF58s79XHUNESaK19Ij5nOvUbBj0V8Z+tj5qu1NEclpdWo07CusSlisSWkoKvnnzVMcQUVb52edsGHwJp4baMH3wV7TJbKM6Ur3Ees61efbjbK4I8/684pg9h0gOYcOkqKzhxwgsVySM6mrCu3erjiFiIBFaesR0znXzntB9GFe9KbeZRHQs2Nzw11LLFYmalStVRxCxZPOWHjGbc63pmJe8zPs/7aBod3S7eIrkNX/Tbny1DTtjY6kiYYZCsvU1Sdi1pUdFbQVmDOZcm72uJ5DWjPs/Xhr1xxbJa9nWSsINXLy2VJEw/H4CS5epjiHixI4tPSIjTKM85zqjCZz1CLd/tDq6jyuS3urSKlJdDWv2Z6kioblcBFZIp8ukYrOWHrGYc20OepZlpX7+u2J7VB9XiJqQQUlFoEGPYakigWEQ2i7fKMnILi09oj7num0/zPYDueZNaeAnYmNxA09eW6pI1G5Kniln4tfs0NIjqnOuHS7MS17i5dkl7PLKjGYRGz9u2o2/ASevLVUk/EukT42wdksPb9CLGaU51+Zpv6dSy+Tpqaui8nhCHMzSLRUEG3Aw0zJFwvD5CCyVnR0iwqotPaI2wrRRGzj1Tq5/Z3nDH0uIw1hZUonHXf/Fa8sUCTMUombdetUxhIVYsaVHtIqEOfgFZm+sZN4mOTgqYstbG25QixfLFAnN5SJYVKQ6hrCgys8+Z8MlQyzR0iMqc647X0C42Qn89u2F0QklxBFsq6z/DidLFYnQjh2qYwiLChYVWaKlR4PnXLvTMS96nie/2oyvVhr4ifjY3IAeTpYpEuHdu8FseFtbkcAs0NLj5znX9fvWMQc8SKlf5/VZG6KcTIhDW1NahVnP11fLFIng1hLVEYRNqGzp8fOc63rMlGhyLPQaxdUybU7E2cadvnoPILJMkajZKD9ZibpT2dKjvnOuzSGvMGX5LlaXVscglRCHVrTbRzBs4ysJMxSidu1a1TGE3Shq6VGfOddmjxEEswu5631pAy7ir3i3H4eu1etzLVEkjECA4JatqmMIm4p3S4/InOujKBKeHDjvKe6dvA4ZNidU2LLbT6qrfi/3ligSmCa1xTKJS9RfPFt6ROZcN67zx5vnPcW6slom/SQ/CAk1asMGVYH6zZWwRJHQXC6CW7aojiESQDxaehzVnOuWvTGPvZiRb8htJqHWtnp2g7VGkXC7CZeVqY4hEkSsW3rUec617sAc8grj55VS0oDDTEJEQ9Hu+p2VsESRMAMBOSMhoiqWLT3qOufa7HszXnceD0+W/kxCvV3V9es0bIkiYXi9qiOIBBWLlh51mnOd2QwGjOHWD2Rmu7CGndU19TpQZ4kiEa6sUh1BJLCfW3p8FZWWHnWZc21e+HcWbvHy3ZqdDXouIaKl3B8kGD767XXWKBIVDZucJMQRBYOU/vnPUWnpccQ51+0HYrbtx6g3pYGfsI4Kf7BeB+qsUSR2S7tkER/RaOlx2DnXzhTMwf/H378rprKeWw6FiIVKf4iwYdMiEdq5S3UEkUQa2tLjcHOuzdP/QFk4lee/kQ4CwloqA8F6fZ7yImGaJqGd0iJcxFkDWnoccs5143Zw8i1cN2FZlMMK0XCVfrsWidpawuWyJiHUqE9Lj0PNuTYveYn/ralgUXFFLKIK0SAV/mC9+jepLxKhEOGKStUxRBI72pYeBx1h2nUIofyu3PLOTzFMKkT9VfqDuBxH/5KvvEhgmpi19TvkIUQ01bWlx6+KREom5gXP8fCXGwlIBz9hUVU1IZwOG15JYJpg1G8YhhDRVpeWHr+cc22e9SjF1fDO3M3xjCrEUTFNCNbjhxj1RQIwQ1IkhHUcqaXHAXOumx4Hx1/B1W9KAz9hfYYtT1ybJoSlSAjrOVRLj5/nXEca+H28eCcbdtV/0LwQ8VKPYxIWKBKAKUVCWNTBWnrsnXNtnnoHNRktufdjmVkt7KE+VxLOI39I7EmREJa2p6VH9fTp3Pvcs5zTciC14Vqc/e/nrveXYchatbCJ+jTb1sz6tAWMonBlJcW33Ybvhx9VxhCiTpz5+bR4/h+kHnssQU1nydZq1ZGEqLPjW2XjcjqO6nOscSUhC9fCJkI7drD96adp8cbbLPxyo+o4QhwVrcWR56D8kiWKhFyvCzvJv+8BVs4pYf4Xm1RHEeKo9Dy7NU730V1JWGLhWgi70LOySOnSlUX/LVIdRYijpml2PEwHaCkpqiMIUScF999P6YYKKnb4VUcR4ugdfY2wQpHQ0NMO0ZtfCIvxnHM+87+Uk9XCnux5JaFr6J66tWgWQqVGV11FbS0UrShTHUWIetHs2AVW03W01IMPcBHCShpdfyMLp8litbAnTddwOG1YJNB1ud0kLC/1+ONxNs5h5ffbVEcRol7cqQ7CIRv2btKcTnSPFAlhbU3G/JGVc7YRrJEzPcKe3B4nRtiGXWA1pxM9LV11DCEOSc/OJuXYLrLtVdhaSpoTsx4d/pQXCQA9I0N1BCEOqeD++2Tbq7C9FE/9zk5bo0hkSpEQ1uU5W7a9Cvtzpzntek4CnDmNVUcQ4qByRsq2V5EYUjwum56TAJwFBaojCHFQ2dePZsFU2fYq7C8lzYluyxnXgDNXriSE9aT26IEzpxGr5si2V2F/KWlOHM6jf8m3RJHQs4++fa0QsZb/wBhWfi/bXkVi8GS67Xu7SdM09HRpzSGsQ2/UiNRju/CTbHsVCSKjUf0aqVqiSBg1tTibNFEdQ4h9Cu6LbHut3CnbXkViyMqv36FlSxQJjDDO/HzVKYTYx3P2ecz7UhasReJIt/OVBLouRUJYRs7IkdTWmBSv2K06ihBR4XDpuFKObiLdXpYoEprbLUVCWEb29Tcyf5ocnhOJI7NxKqHa+m3AsESR0N1unM2bq44hBKk9I9teV8u2V5FAMnNT69W3CSxSJABSO3ZUHUEI2fYqElJm41R0R/1e7uvX8SkG3IWFqiMoETJNxmGyHQgBw9DIBV7GxAkUAjegoWsaL5oGG4Hz0RigaXhNk1cxuUuzTK23Nb1RI1I7d+Gnx35QHUWIqMrO9+B027xIOPPywOmEUEh1lLj6DsgE7tJ0Kk2T32OSDfwWjc6axgTTYDpwgmlSDjyFxoOYDEDjI0x+U5+OXeKgCu6/j23rZdurSDw5zdLrdZAOLHS7yaipwd2ypeoYcXcKMGK/F3oHsAvovOcftDMaKzBxA2EgCLiBUtMkALSp5z+8+DXPWeczX7a9igSUnVf/EdGWKRKEDdyFbVWniDuPpuHRNPymydOYXIlGAbDUjCwy/UikGKRqGn3QeBaTy9H4AJOL0HjNNHjdNAiY9VuUEhE5V4+ktsageKVsexWJJz0nAYqElpqKu7Cd6hhK7DBNxmJyBhr9NY079txKetw0yAay9nzcuZrGmD3rD03RWAx0ReNYNKarCp8gsq+Tba8iMaWkO3G66v9Sb5kiobtdpHbtojpG3JWbJo9icjUaZ+25dTQPuB2NBzWdKuD4X6w7TMLkYqCGn/8BA3HMnGhSe/aUba8iYeW1zKz3GQmw0MI1QEqnTqojxN2HmFQDH2DywZ5bRoPReAyTFNOkO9Brv3WHGaZJbzRSNI1TTJO/YqIB98gCdr3lPzCGFbNLZNurSEh5LTNwuOt32hosViTcLVqojhB3N2g6Nxzk/X0O8aLfb7+CkadpPCXFoUEi216PZdFjc1VHESImmnXIxlmPORJ7WeZ2EwAOB468PNUpRBIpuP9+StZVULlTbtiJxJTfOrNBn2+pImHW1uLp3k11DJFEPGedJ+NJRcJyOPV6d3/dy1JFQk9Lw9Ojh+oYIknkXHO1bHsVCS2nWRqhWqNBj2GpIqE5HKSfdJLqGCJJZF97I/PlKkIksPxWmTT0vK2ligTs2eEkp4hFjHlO6ImzUTarZNurSGAFhVm4Uxu2P8lyRQLDwN22jeoUIsHl3x/Z9trQS3EhrKygMOvIH3QElisSJuA57njVMUQC03NySOl8LIu+KVIdRYiY0XWNRgVpDX+cKGSJKkd6Op7evVTHEAlMtr2KZJDfJhMj1PCebpYrEgBpJ0qRELGTdta5LJBuryLBteycg6MBPZv2smSRcLdsgeZ2q44hElDjUdcQCBgUr5JtryKxte2eh6MBJ633smSRMAIBPD1kXUJEX9aoG+QqQiQ83aGR16phJ633PVZUHiXKdI+H9FNPUx1DJBjPiSfibNSIVXNl26tIbE3aZBIORWfnniWLhOZ0knHmANUxRILJv/8B2fYqkkLLzo0bNENif5YsEgApbduip6erjiEShJ6TQ8oxnfnpa9n2KhJf2+Oisx4BFi4SRiBAWp8+qmOIBFFw/wOUrCunapdsexWJTXdq5LXIiN7j1eWDioqKuP322xk5ciTDhw/nkUceobq6mnHjxvHuu+8e8LGXXXYZxcXFDQ+Wnk6m3HISUZJ21jnM/1LGk4rEV9A2i1CU1iOgDkOHAoEAt9xyC0888QTHHx/ZcfTJJ59w9913061b7Np6a7pOxgApEqLhGo8aRcBvsEW2vYok0OrY6K1HQB2uJL799lt69+69r0AADBkyhN27d1NUFNv7u3p6Ou62bWP6HCLxZV97g8yMEEmjU++CqK1HQB2uJIqKimjduvWv3t+yZUtKSkpYtGgRn3/++b73r127NmrhANL79aN248aoPqZIHp4TT8SRnc2quUtURxEi5jJzUxs8ZOiXjlgkCgoKWLx48a/ev3HjRjp06MCgQYO44oor9r3/sssui1o43eMh+4JB7B4/PmqPKZJL/gMPsHyWbHsVyaFdj/yoP+YRr0kGDhzI7NmzDygUEydOpHHjxrRq1SrqgX4p5dhjcTRqFPPnEYlHz8khpVNn6fYqkkbnk5vidDui+phHLBLp6em8/PLLvPjiiwwfPpxhw4axaNEinnvuuagGOaRQiMyzzorPc4mEUvDAA2xdK9teRXLwZLrIKYj+2TLNNM2G95KNMd/ChWy64krVMYTNtJ//E5+/ulx2NYmk0OW05pw2rAOulIZNovslyx6m219ql65yy0kclcbXXivbXkVS6Xxy06gXCLBJkTBDQTLPPlt1DGEjWaOuZ75sexVJwu1x0qR1w0eVHowtioQjPZ3sob9RHUPYhKdXL5zZ2ayWbq8iSbTtnks4HJsdfLYoEgCpXbrILSdRJ/n3y7ZXkVyOOakp7tTo32oCGxUJMxSSW07iiPTcXFI6HSPbXkXSSEl30rxj7H6Atk2RcKSl0WjYMNUxhMUV3H+/bHsVSaVz36aYMbxotk2RAEjp1BFXixaqYwgLSzvzbBlPKpLKcWe2wpUS3QN0+7NVkUDXabRfCxAh9tf4uusi215Xl6uOIkRcNGmbiSfDFdPnsFWR0N1uci4bBs7YLNAIe8u65jrmy1WESCLdz2iJwx3bl3FbFQkAdJ3MM85QnUJYjKe3bHsVycXp1ml/QhN0XYrEARwZGTQedY3qGMJi8u8bE9n2GpRtryI5dDixCaYR+65KtisSAKnduuFs3lx1DGERkW2vnWTbq0gqx5/ZKmZnI/ZnyyKBppFz+eWqUwiLKHjgAbaukW2vInk0KkgjuyAtLs9lyyKhp6SQM/xycMRu25ewj7QBZ0mfJpFUup3eAl3X4vJctiwSADgcZPTvrzqFUKzx9dcT8BtslW2vIkm4Uhx0Oa15VOdYH45ti4QjI4P8225VHUMoli3bXkWS6dqvBRC/MUC2LRIA7rZt8fTooTqGUMTTuzd6VpZsexVJQ3donHhem5jMjTjkc8btmWJAS00l/47bVccQiuTfP4blM7fKtleRNDr2KkB3xmctYi97Fwldx9OzJ+527VRHEXGm5+aS0rEji78pVh1FiLjpc3FhXLa97s/WRQIAl4u8W29RnULE2b5tr2Wy7VUkhzbdcklNj22fpoOxfZHQnU4yBw7EWVCgOoqII9n2KpJNn4vifxUBCVAkANB1cm+4QXUKESeNr78evy8s215F0ihom0VO03Qlz50QRUJ3u2l06VD0rNgMAhfWknXNdXIVIZJK7wsLcbjUvFwnRJHYq/HVV6uOIGLM06cPjqws1swtVR1FiLhoVJBGi06N4nbC+pcSpkjoHg+5112Lo1HsZr0K9fLve0C2vYqkcsrQDugONQUCEqhIAKDr5N12m+oUIkZk26tINnmtMmjZOQfdoe6lOqGKhJ6aSqNLh+Js1kx1FBEDBWPGsGX1btn2KpJGv8s6xq1H06EkVJEA0BwOmtxzt+oYIgbSzhjIgqmbVccQIi6adcgmv3WWsrWIvRKvSLhcZA4ciLt9e9VRRBTl3nBDZNvrGtn2KpLD6Zd3whnj+dV1oT5BDGguFwV/HKM6hoiizKuvlW6vUWSYBv9d8RYT5z/Dhwuepdy/Y9/vTV8zkSVbpu/79TerJvD+/L+wYtscAGpCfqYu/3fcMyeTdj3yyc73oGlqryIgUYuEw0Faz554ekqH2ESQ1rcvjqwsVv8g216jZcPOxQAMO/FeTiq8iBlrP8RXW8WkReNYv+f3APzBany1VVx2wr0sL5kNwLxNX9Kr9TlKcicD3aFx+vBOuBScrj6YhCwSEOkQ23Tsg6pjiCjIu+9+ls/cSli2vUZN+/wenHnMCACqArtIc2URDNfQt+2FdG7ad9/HOXUXhhEmZIRw6C4q/DsJhmvJzWihKnrC69a/Ba5U60zdTNwioWm4C9uSea78xGNnem4uKR06sujrItVREo6uO5i24g2+XfMBHZr0JNuTR9PswgM+xuVIoTDvOL5c/jp9217Ajxs/p0fLAXy3+n2mr5lIMFyjKH1icqc66HtROyU9mg4lYYsEgJ6WRtNHH0VPj8/AcBF9Tfdse63eLS9GsXDOsaO4uu8jfLNywiFf8Lu36MdF3W8GTLI9+RTtXkXzRh1plt2eVaU/xjdwglN9cO5gErpIQOTsRP7v7lQdQ9SHruM5YyDzv5Rtr9G2Yttcftz0JQBOhxtN09CO8HKwoOhrerQaSMioRdd0NJAriSjKb51Jp75Ncbqtc6sJkqRINBo2jJROHVVHEUcp94ZIt9eStbLtNdo65PdgR1URHy54lkmLxtGvwzCcjkPPKlhd+iPtcrvjcrjpkH8CCzZ/xU/F39CxyYlxTJ24NF3j7Ou64FTUxO9wNNM04zdRWxHTMKhZtYoNvxkKif/HTRiF02cz+8ttrJhdojqKEDF1/MBW9L24MK6zq+vKemUrBjRdx926NdmXXKI6iqijtJNOQs/KZPWPsu1VJLb0Rin0vbidJQsEJEmRANDT0ykY8wB6drbqKKIO8v5wP8tmbJFtryLhDbiqM7rTWovV+0uaIgGgud0U3H+f6hjiCBz5eaR06CDdXkXCa9M9l+YdG+FQ2OX1SKybLAb0lBSyzjsPT8+eqqOIwyh4QLa9isTndOsMvPpYXCnW2s30S0lVJCAynKjFP/6OliZnJyxJtr2KJHHSJe0tXyAgCYsEgCMri6Zj/6g6hjiI3Buux18dkm2vIqEVFGbR9bTmljsTcTBJWST01FSyzj+f9H79VEcRv5A1Urq9isTmSnFw/k3dbVEgIEmLBOy57fTXZ2QmtoWknXwyeqZsexWJbcBVnUnxWHO768EkbZEA0Dwemv35z6pjiD3y/nCfbHsVCa3DiU1oe1yeba4iIMmLhO52k35SX7IuuEB1lKTnyM8jpb1sexWJKyMnhQEjO9tisXp/SV0kYE+n2McexdmkieooSa1gzB/Zskq2vYrEpGlw3ujuOCzYm+lI7Jc4BjS3mxbP/wN0+etQQtfx9D+T+VNlwVokphPObUPjZumWPjR3KPZLHAO6y0Vqp07k33WX6ihJKfe3N+CrDlGytkJ1FCGiLr91JicOamu720x7SZHYQ09Lo/FVI8jo3191lKSTddUoFsi2V5GAnG6dQTd3t2QL8Lqyb/IY0D0emj/7V1wtW6qOkjTSTj4ZPUO2vYrEdO4N3UhNd6Fp1m3gdyRSJH5B93ho9dqraCkpqqMkBen2KhJVrwva0uKYHFttdz0YKRK/oDkcuJo1o9kTT6iOkvAi217bs/h/su1VJJY23XI54dw2tl2H2J8UiYPQU1PJHHgm2cOGqY6S0ArGjKVYtr2KBNOoII1zbuiKy+ZXEHtJkTgEPS2NpmMeILVrF9VREpOu4+k/gAWy7VUkEHeqg4t/18P2t5j2J0XiMLTUVFq9+iqOvDzVURJO7m9/K9teRWLR4PybuuPJdKHr9l2o/iUpEoehaRp6Vhat//0vtNRU1XESStZV1zD/C7mKEInj5EvaU1CYjdOVOFcRIEXiiHSXC3erVrT4x98jZ+tFg6Wdcgp6RiZrZNurSBDtT8in+xktE2Kh+pekSNSBnppKeu/eNLnvD6qjJIT8e+9j6fQthEOy7VXYX0FhFgOv6ZKQBQKkSNSZnpZGzmWX0+jKK1RHsTVnkya4ZdurSBCNm6Vz8R09ErZAgBSJo6KneSi4914yzjxTdRTbajJmDMWrduMtl22vwt4yclK45O6eCV0gQIrEUdM9Hlo8+1c8PXqojmI/uo7n9AHSp0nYXmq6i9/ceyIpHidaAu1kOhgpEvWwt3WHu1071VFsJffGG/FVBSlZJ9tehX25UhwMubsnaVludBu2/j5aif8njBE9PZ2270zA1aaN6ii2kXXVNcyXqwhhY7pD46Lbjycr34PDmRwvn8nxp4wBTdfRs7Jo+9670jW2DtJPPRU9PYM1P25XHUWIetE0OO/GbuS1zky4sxCHI0WiATRdx7GnUDibN1cdx9LyZNursLn+I46hZefGCdOTqa6kSDSQ5nDgaNQoUihkTvZBOZs0wd2unWx7FbZ1+hWd6NS7acLvZDoYKRJRoDmdOBo3pu0H8d87xwAAC4RJREFU70ufp4No8scxFK+Uba/ChjQ48+rOdD6pWVIWCJAiETW604kjN5e2772HIydHdRzr0HU8/QYwX7q9CpvRNDj7ui50OLEgaQsESJGIKt3lwlXQhLbvvYujcWPVcSwhd/RofJVBtsm2V2Ejuq5x3o3dKDwuP6kLBEiRiDrN5cLZrBmFH3+Mq4UsZmeNuFquIoSt6A6NQbceR6suuUlfIECKREzobjfO/DzafvQRKZ06qo6jTPppsu1V2IvDqXPRHT1o3rGRFIg9pEjEiOZw4MjOps077+Dp2VN1HCXy7pFtr8I+nG6dwXf1oGlhVtJtcz0cKRIxpGkajowMWr/+TzL691cdJ65+3vZapDqKEEeUkuZkyN0nkN8qM6FGj0aDFIk40NPSaPH3v5E1+GLVUeKmyR//SNHKMrzltaqjCHFY2U08DH+wD7nNM6RAHIQUiTjRPR6aPfIIja+9VnWU2NN1PP3OYMHUzaqTCHFYzTpkc9kDvUnLduNwycvhwcjfShzpHg/5d9xOwSMPgyNxf2LJHT0ar2x7FRZ3TN+mXHRHD9weJ7ouL4WHopmmaaoOkWwMv5/AypUU3XQzRkXivZAWzvyemVO2smrONtVRhDioky5pz3EDEnMmdbRJ+VRA93hI7dqVdpMn4W7fXnWcqNq77XXtPNn2KqzH4dQ5f3Q3KRBHQa4kFDINAzMQYMvdd1P9v29Vx4mKNpM+ZcUGJ3P+s151FCEOkJrh4uLf9aBRQZpscT0KciWhkKbrkZ1Pzz1H7s03qY7TYM6CJrgLC1nyrXR7FdaS1yqD4Q/2oXGzdCkQR0muJCzC8Pnwzp7NlnvuxQwEVMepl+bPP09ZQQ8+f2mJ6ihC7NOtfwtOGdoBp0tH0xJ7HnUsyJWERehpaaSfdhqFn3yMu21b1XGOnq7jOa0/C6RPk7AIV6qD82/qzim/6YDL7ZACUU9SJCxET03F3bo1hR9/RPYll6iOc1Ryb7opsu11faXqKEKQ2yKDKx/uS+uujWWBuoHkdpNFGT4f1TNmUjJmDIbXqzrOERXOnMPMT7ewaq5sexVqde3XnFOHdZTbS1EiVxIWpaelkXFGf9p98Tmp3bqpjnNY6f1OQ09PZ838UtVRRBJzpTg4f3Q3Tr20o9xeiiK5krABw+9n5/+9yK7XXwcL/nO1nvQpK2Xbq1Aot0UGF9x6HJ4Ml/RfijK5krAB3eMh75abaTP+LRy5uarjHMBZUECKbHsVimi6Rq8L2jL0vhPJaJQiBSIGpEjYhJ6Whuf442k/9UuyLrpIdZx9mowdS9EK6fYq4i+naRqXj+3NCee0idxe0uX2UizI7SYbMnw+/EuWsvW++whtU7hQrOu0n7+Qyc8vpnSD7GoS8aFp0OOc1vS+oBCHU0eX4hBTciVhQ3paGp4TT6D955+RM+LKyHeNAnk334y3PCgFQsRN4+bpXP5gH3oPKsTldkiBiAO5krC5sM9H7caNbL37bmo3bIzrcxfOmsPMybLtVcSe7tToc2Ehx5/ZCodTl1tLcSRXEjbnSEsj9ZhjKPz4E/JuuQWczrg8b3q/fmhpabLtVcRcs/bZXPXoSRw3oBVOWXuIO7mSSCCGz0do505Kxo7F98OPMX2uNpM/ZcU6J3MmybZXERsZOSmcPrwTLTvLqWmVpEgkIMPnwzd/Adsee4xgUVHUH99ZUEDhV18z/sHv8VXIriYRXU63Tq9BbTn+zFZoDg2HQ254qCRFIkEZoRCEQpS//wE7xo3DqK6O2mM3f2EcZXnH8fnLS6P2mEKgQafeBfS7rBMOty4tvS1CikSCMwIBzGCQ7c8+S/nEDyEcbtgDOhy0n7eQyc8vkl1NImoKCrMYcFVnsnJTcaXGZ11N1I0UiSRh+HyEdu2i5MGH8M2ZU+/HybvtNpxDR/HOo3OjmE4kq/RGKfS7rCOtu+VKQz6LkiKRZAyfj8DKlWx/5hn8C3866s8vnDWHGZO3sFq2vYoGSMty02tQW449pRmaruFwyrqDVcl1XZLR09Lw9OxJ69dfJ7BqdaRYLFhQp89N798fLS2NtbLtVdRTeiM3vQYV0vmkpqCB0yXrDlYnVxJJzDRNTL+fwOo1kWIxf/5hP77N5CmsWOeQba/iqGXkpNDrgrYc06cpaBpOV3yuHJ566imWLVvGjh07CAQCtGrViqysLFauXMnEiRNp3LgxXq+Xq666iieffJLOnTvHJZedSJEQ+4pFzdq1lD79DP558371Mc6mTSmc9l/Z9iqOSkZOCn0uakfHXk2U3lb6+OOPWb9+Pffccw8AEyZM4Ntvv+XVV1/lzjvvpF+/flx66aVKslmd3G4SaJqGlpZGavfutH71FWrWrWPHuBfwzpixb35FwdixbF6+SwqEqJOsvFT6XFhI+xOboGnWW3MYMWIEs2fPZvTo0eTm5kqBOAwpEmKfvcXC0707Lf72HEZlJTtfeZWKKVNIPfV0Fjy/SHVEYWUatO7SmBPObUNB2yzLL0iPGDGCa6+9lnfffVd1FEuTIiEOypGejiM9nSb33kPBmAcIhjV8lXIVIX4tJc3Jsac0p8fZrXClOHDb4JxDZWUlf/rTn3j00UcZO3YsEydOJD09XXUsS7L+v6ZQyrHnG8cRMrjy4b5s21DBwqmb2byiDGQ1K6nltsig5zmtaN+zCaaJrforPfDAA4wYMYLhw4dTUlLCo48+ytNPP606liVJkRB14txz26DlMY1p0iaLYE2Yxf/f3r3ttHFFYRz/z54Zn8EO4BgIkISmSajai6Sq1PICveUanoFX4hl4gkpV1ZtKLVKF2iYNJG04OAFsDj7gmdkzvXAa5caqGgzGyfeT5tZeGo38afZee/m7HZ7+VKVR7wy4OrkqxnWYf1Tmy29vU6zkcF0HM2SzldbW1jDGsLy8DMDq6iorKyusr6+ztLQ04OquH3U3yXuLAgsO1PaabH6/y9bGAUE7GnRZ0meOA1OflvhscYr5R2WShKFYUpL+UEhIXwTnEcY17D2ts/nDLn9tHhFHerSGWXluhIeLU9z/qoIxDl7aYMxwvTXIxSkkpO+CdoRjHLY3XvPbj/vsPTvW/sWQKFVyPPh6koXFKfy0i+eboVtOkv5SSMilieOYKIhJ4oQXm0ds/3LAyz9qhOcXnEQr/ePAxEyBO59P8OCbSfKlNOaat67K1VJIyJVIkoTg3OL5hqOdBs9+fsWLzSPq+61Bl/bRyeR9ZhfGmH9cZnZhDMcB45m3zQki71JIyEBEoSWJu5vfz389ZHvjgN0ndaIwHnRpHxzHONy8PcKdLyb45HGZ0fEsNopJZbX5LP9NISEDF8cJYaf7lnH8usXukzp7fx6zv3WiMSDvwfUN5bkRKndHmV0YY/peiSROcFNGfwUq/5tCQq6df0PD9RzCjuXV81Ne/l6junXC4U6D2OqRfVfxZpbJu0Wm75eYvldiZDxDFMa4nqNR3HJhCgkZClFgiW2C6xvq1RbVrWMOdxrUqy3q1Sbts3DQJV464zmUyjlKlRzluQIzD8cYv1UAuns+Orsgl0EhIUMr7FhiG+P6LkmccHrY5mi3wcHfZ9SrLWr7Tc5q50PXfpsvpShV8pQqOcZv5ZmYKVAs58jkvW63GAl+ylVrqlwJhYR8cKIoxgYW4xqM69BpRbQbAc3jgLNam7PDc5onAa2TDq3TgOZJh3YjvPQw8dMumYJPtuCTHU2RL6bJl1IUJ7IUxjIUbmQo3EiTxAk2ijGewU9puUgGSyEhH53YJm+6qxJwnO78Ic8QdSzWxtgoxoYJNrREYfesRxRYwsASdSxBp3vOw/MNnu/iprrto65vcL03l2/efm4q4+KnPZKk++Pf/V4wxuClDI7jDPiOiPSmkBARkZ60qCkiIj0pJEREpCeFhIiI9KSQEBGRnhQSIiLSk0JCRER6UkiIiEhPCgkREenpH+z77mfEKmDcAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#a visualization of each race by state\n", + "for race in races:\n", + " values= top5_race_pc[race]\n", + " labels= top5_race_pc.index\n", + " plt.axis('equal')\n", + " plt.title(race)\n", + " plt.pie(values, labels=labels, radius=2, autopct='%0.0f%%')\n", + " plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Blacks have a generally higher percentage of killings in each state compared to other races.**\n", + "\n", + "**TX has a large share of killings of Native Americans and Blacks.**\n", + "\n", + "**CA has a large share of killings of Whites, Hisapnics and Asians.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Profile of a person killed by police" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot2015-02-01shotgun53.0MASheltonWATruehighNot fleeingFalse
14Lewis Lee Lembke2015-02-01shotgun47.0MWAlohaORFalsehighNot fleeingFalse
25John Paul Quintero2015-03-01shot and Taseredunarmed23.0MHWichitaKSFalsemediumNot fleeingFalse
38Matthew Hoffman2015-04-01shottoy weapon32.0MWSan FranciscoCATruehighNot fleeingFalse
49Michael Rodriguez2015-04-01shotnail gun39.0MHEvansCOFalsehighNot fleeingFalse
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 2015-02-01 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 2015-02-01 shot gun 47.0 \n", + "2 5 John Paul Quintero 2015-03-01 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 2015-04-01 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 2015-04-01 shot nail gun 39.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True high \n", + "1 M W Aloha OR False high \n", + "2 M H Wichita KS False medium \n", + "3 M W San Francisco CA True high \n", + "4 M H Evans CO False high \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False " + ] + }, + "execution_count": 263, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": {}, + "outputs": [], + "source": [ + "race_avg_age = killings.groupby('race')['age'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "metadata": {}, + "outputs": [], + "source": [ + "race_avg_age.name = 'average_age'" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "race\n", + "A 36.538462\n", + "B 31.669903\n", + "H 33.018913\n", + "N 30.451613\n", + "O 33.071429\n", + "W 39.942693\n", + "Name: average_age, dtype: float64\n" + ] + } + ], + "source": [ + "print(race_avg_age)" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "34.11550197750503" + ] + }, + "execution_count": 267, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "race_avg_age.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "M 2428\n", + "F 107\n", + "Name: gender, dtype: int64" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.gender.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [], + "source": [ + "males = len(killings[killings['gender'] == 'M'])\n", + "females = len(killings[killings['gender'] == 'F'])\n", + "total = killings.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2428, 107, 2535)" + ] + }, + "execution_count": 270, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "males, females, total" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [], + "source": [ + "males_pc = round((males / total) * 100)\n", + "females_pc = round((females / total) * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(96, 4)" + ] + }, + "execution_count": 272, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "males_pc, females_pc" + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.axis('equal')\n", + "plt.title('Gender Percentage of Killings')\n", + "plt.pie([males_pc, females_pc], labels=['male', 'female'], radius=2, autopct='%0.0f%%')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**The average profile of a victim killed by police is a Black male, aged 34 years and most likely living in TX.**\n", + "\n", + "**The average profile of a victim killed by police is a Hisapnic male, aged 34 years and most likely living in CA.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Justification of killings" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.swarmplot(data=killings,\n", + " x='age',\n", + " y='flee',\n", + " hue='manner_of_death')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Victims of police shootings averagely between the age of 20 and 45 were either not fleeing, or fleeing by car and foot.**\n", + "\n", + "**Victims aged 50 and beyond were mostly shot when not fleeing.**" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(data=killings,\n", + " y=\"manner_of_death\",\n", + " hue='threat_level')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Majority of victims who were shot were a high threat level.**\n", + "\n", + "**Victims who were shot and tasered were both high and medium threat levels.**\n", + "\n", + "*It can be seen that majority of the killings were justified as victims who were high and medium threat levels.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Level 1/Intermediate/US Police Killings Analysis.ipynb b/Level 1/Intermediate/US Police Killings Analysis.ipynb new file mode 100644 index 0000000..a67c033 --- /dev/null +++ b/Level 1/Intermediate/US Police Killings Analysis.ipynb @@ -0,0 +1,5360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Import required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import re\n", + "import seaborn as sns\n", + "sns.set_style(\"whitegrid\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Open and read the data sets." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "income = pd.read_csv('datasets/MedianHouseholdIncome2015.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "poverty = pd.read_csv('datasets/PercentagePeopleBelowPovertyLevel.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "education = pd.read_csv('datasets/PercentOver25CompletedHighSchool.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "killings = pd.read_csv('datasets/PoliceKillingsUS.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "city_race = pd.read_csv('datasets/ShareRaceByCity.csv', encoding='windows-1251')" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCityMedian Income
0ALAbanda CDP11207
1ALAbbeville city25615
2ALAdamsville city42575
3ALAddison town37083
4ALAkron town21667
\n", + "
" + ], + "text/plain": [ + " Geographic Area City Median Income\n", + "0 AL Abanda CDP 11207\n", + "1 AL Abbeville city 25615\n", + "2 AL Adamsville city 42575\n", + "3 AL Addison town 37083\n", + "4 AL Akron town 21667" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "income.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "51" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "income['Geographic Area'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "city_samp = city_race.sample(frac=0.25, replace=False, random_state=0, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 7317 entries, 20414 to 22006\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic area 7317 non-null object\n", + " 1 City 7317 non-null object\n", + " 2 share_white 7317 non-null object\n", + " 3 share_black 7317 non-null object\n", + " 4 share_native_american 7317 non-null object\n", + " 5 share_asian 7317 non-null object\n", + " 6 share_hispanic 7317 non-null object\n", + "dtypes: object(7)\n", + "memory usage: 457.3+ KB\n" + ] + } + ], + "source": [ + "city_samp.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city_samp.index.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29322 entries, 0 to 29321\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic Area 29322 non-null object\n", + " 1 City 29322 non-null object\n", + " 2 Median Income 29271 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 687.4+ KB\n" + ] + } + ], + "source": [ + "income.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCitypoverty_rate
0ALAbanda CDP78.8
1ALAbbeville city29.1
2ALAdamsville city25.5
3ALAddison town30.7
4ALAkron town42
\n", + "
" + ], + "text/plain": [ + " Geographic Area City poverty_rate\n", + "0 AL Abanda CDP 78.8\n", + "1 AL Abbeville city 29.1\n", + "2 AL Adamsville city 25.5\n", + "3 AL Addison town 30.7\n", + "4 AL Akron town 42" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poverty.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic Area 29329 non-null object\n", + " 1 City 29329 non-null object\n", + " 2 poverty_rate 29329 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 687.5+ KB\n" + ] + } + ], + "source": [ + "poverty.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCitypercent_completed_hs
0ALAbanda CDP21.2
1ALAbbeville city69.1
2ALAdamsville city78.9
3ALAddison town81.4
4ALAkron town68.6
\n", + "
" + ], + "text/plain": [ + " Geographic Area City percent_completed_hs\n", + "0 AL Abanda CDP 21.2\n", + "1 AL Abbeville city 69.1\n", + "2 AL Adamsville city 78.9\n", + "3 AL Addison town 81.4\n", + "4 AL Akron town 68.6" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "education.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic Area 29329 non-null object\n", + " 1 City 29329 non-null object\n", + " 2 percent_completed_hs 29329 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 687.5+ KB\n" + ] + } + ], + "source": [ + "education.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot02/01/15shotgun53.0MASheltonWATrueattackNot fleeingFalse
14Lewis Lee Lembke02/01/15shotgun47.0MWAlohaORFalseattackNot fleeingFalse
25John Paul Quintero03/01/15shot and Taseredunarmed23.0MHWichitaKSFalseotherNot fleeingFalse
38Matthew Hoffman04/01/15shottoy weapon32.0MWSan FranciscoCATrueattackNot fleeingFalse
49Michael Rodriguez04/01/15shotnail gun39.0MHEvansCOFalseattackNot fleeingFalse
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 02/01/15 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 02/01/15 shot gun 47.0 \n", + "2 5 John Paul Quintero 03/01/15 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 04/01/15 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 04/01/15 shot nail gun 39.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True attack \n", + "1 M W Aloha OR False attack \n", + "2 M H Wichita KS False other \n", + "3 M W San Francisco CA True attack \n", + "4 M H Evans CO False attack \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False " + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 2535 entries, 0 to 2534\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 2535 non-null int64 \n", + " 1 name 2535 non-null object \n", + " 2 date 2535 non-null object \n", + " 3 manner_of_death 2535 non-null object \n", + " 4 armed 2526 non-null object \n", + " 5 age 2458 non-null float64\n", + " 6 gender 2535 non-null object \n", + " 7 race 2340 non-null object \n", + " 8 city 2535 non-null object \n", + " 9 state 2535 non-null object \n", + " 10 signs_of_mental_illness 2535 non-null bool \n", + " 11 threat_level 2535 non-null object \n", + " 12 flee 2470 non-null object \n", + " 13 body_camera 2535 non-null bool \n", + "dtypes: bool(2), float64(1), int64(1), object(10)\n", + "memory usage: 242.7+ KB\n" + ] + } + ], + "source": [ + "killings.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic areaCityshare_whiteshare_blackshare_native_americanshare_asianshare_hispanic
0ALAbanda CDP67.230.2001.6
1ALAbbeville city54.441.40.113.1
2ALAdamsville city52.344.90.50.32.3
3ALAddison town99.10.100.10.4
4ALAkron town13.286.5000.3
\n", + "
" + ], + "text/plain": [ + " Geographic area City share_white share_black \\\n", + "0 AL Abanda CDP 67.2 30.2 \n", + "1 AL Abbeville city 54.4 41.4 \n", + "2 AL Adamsville city 52.3 44.9 \n", + "3 AL Addison town 99.1 0.1 \n", + "4 AL Akron town 13.2 86.5 \n", + "\n", + " share_native_american share_asian share_hispanic \n", + "0 0 0 1.6 \n", + "1 0.1 1 3.1 \n", + "2 0.5 0.3 2.3 \n", + "3 0 0.1 0.4 \n", + "4 0 0 0.3 " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city_race.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29268 entries, 0 to 29267\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Geographic area 29268 non-null object\n", + " 1 City 29268 non-null object\n", + " 2 share_white 29268 non-null object\n", + " 3 share_black 29268 non-null object\n", + " 4 share_native_american 29268 non-null object\n", + " 5 share_asian 29268 non-null object\n", + " 6 share_hispanic 29268 non-null object\n", + "dtypes: object(7)\n", + "memory usage: 1.6+ MB\n" + ] + } + ], + "source": [ + "city_race.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will concatenate the `income`, `poverty`, `education` and `city_race` dataframes for compact analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.concat([poverty, education, income, city_race], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Geographic AreaCitypoverty_rateGeographic AreaCitypercent_completed_hsGeographic AreaCityMedian IncomeGeographic areaCityshare_whiteshare_blackshare_native_americanshare_asianshare_hispanic
26119TXTimpson city42.7TXTimpson city70.8TXTomball city44086TXVenus town79.413.20.51.724.8
20016OHPainesville city23OHPainesville city78.1OHParma city50440OHPleasant Run CDP79.814.50.51.82.2
15984NJEast Rutherford borough10.1NJEast Rutherford borough92.9NJEllisburg CDP61544NJFarmingdale borough89.62.90.53.26.9
14734MTCamas CDP48.6MTCamas CDP87.5MTCharlo CDP44583MTConrad city95.10.21.80.31.5
18451NCMarshville town28.9NCMarshville town71.1NCMaysville town24432NCMorganton city70.112.20.92.416.4
28940WIRichfield village2.8WIRichfield village95.8WIRiver Hills village156250WISpooner city95.10.31.90.71.3
1442ARBlue Eye town74.4ARBlue Eye town16.7ARBlue Eye town(X)ARBooneville city93.510.90.63.2
25763TXPoint Comfort city7.3TXPoint Comfort city89.2TXPortland city62561TXRamos CDP76.7000100
7761INLittle York town20.1INLittle York town77.8INLogansport city32982INLowell town95.90.50.40.36.9
1592ARGreers Ferry city13ARGreers Ferry city81.1ARGreers Ferry city31810ARHackett city920.13.40.60.6
\n", + "
" + ], + "text/plain": [ + " Geographic Area City poverty_rate Geographic Area \\\n", + "26119 TX Timpson city 42.7 TX \n", + "20016 OH Painesville city 23 OH \n", + "15984 NJ East Rutherford borough 10.1 NJ \n", + "14734 MT Camas CDP 48.6 MT \n", + "18451 NC Marshville town 28.9 NC \n", + "28940 WI Richfield village 2.8 WI \n", + "1442 AR Blue Eye town 74.4 AR \n", + "25763 TX Point Comfort city 7.3 TX \n", + "7761 IN Little York town 20.1 IN \n", + "1592 AR Greers Ferry city 13 AR \n", + "\n", + " City percent_completed_hs Geographic Area \\\n", + "26119 Timpson city 70.8 TX \n", + "20016 Painesville city 78.1 OH \n", + "15984 East Rutherford borough 92.9 NJ \n", + "14734 Camas CDP 87.5 MT \n", + "18451 Marshville town 71.1 NC \n", + "28940 Richfield village 95.8 WI \n", + "1442 Blue Eye town 16.7 AR \n", + "25763 Point Comfort city 89.2 TX \n", + "7761 Little York town 77.8 IN \n", + "1592 Greers Ferry city 81.1 AR \n", + "\n", + " City Median Income Geographic area City \\\n", + "26119 Tomball city 44086 TX Venus town \n", + "20016 Parma city 50440 OH Pleasant Run CDP \n", + "15984 Ellisburg CDP 61544 NJ Farmingdale borough \n", + "14734 Charlo CDP 44583 MT Conrad city \n", + "18451 Maysville town 24432 NC Morganton city \n", + "28940 River Hills village 156250 WI Spooner city \n", + "1442 Blue Eye town (X) AR Booneville city \n", + "25763 Portland city 62561 TX Ramos CDP \n", + "7761 Logansport city 32982 IN Lowell town \n", + "1592 Greers Ferry city 31810 AR Hackett city \n", + "\n", + " share_white share_black share_native_american share_asian share_hispanic \n", + "26119 79.4 13.2 0.5 1.7 24.8 \n", + "20016 79.8 14.5 0.5 1.8 2.2 \n", + "15984 89.6 2.9 0.5 3.2 6.9 \n", + "14734 95.1 0.2 1.8 0.3 1.5 \n", + "18451 70.1 12.2 0.9 2.4 16.4 \n", + "28940 95.1 0.3 1.9 0.7 1.3 \n", + "1442 93.5 1 0.9 0.6 3.2 \n", + "25763 76.7 0 0 0 100 \n", + "7761 95.9 0.5 0.4 0.3 6.9 \n", + "1592 92 0.1 3.4 0.6 0.6 " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "#rename the columns\n", + "data.columns = ['state', 'city', 'poverty_rate', 'Geographic_Area_x', 'City_x',\n", + " 'education', 'Geographic_Area_y', 'City_y', 'income',\n", + " 'Geographic_area_z', 'City_z', 'share_white', 'share_black',\n", + " 'share_native_american', 'share_asian', 'share_hispanic']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- The cities are not the same, but the areas(state) are. In this case I will perform my analysis based on geographic area." + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "data.drop(['Geographic_Area_x', 'City_x', 'Geographic_Area_y', 'City_y', 'Geographic_area_z', 'City_z'], \n", + " axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statecitypoverty_rateeducationincomeshare_whiteshare_blackshare_native_americanshare_asianshare_hispanic
18204NCDellview town01002361679.317.10.302.1
10302KYWhite Plains city14.482.35733494.11.6000
20058OHPlumwood CDP6.280.35342148.544.90.14.31.8
20849OKMulhall town28.689.4-88.2010.600
23114PAUtica borough20.584.34546998.60.30.10.41
21753PACentre Hall borough694.95375098.20.300.40.9
11271MDNanticoke Acres CDP52.6505766565.3230.45.55.9
12766MNHoffman city21.887.74500094.71.10.50.72.8
21915PAEagles Mere borough2.51005682795.71.20.21.50.8
4443FLHastings town26.381.836196970.70.40.44.3
2124CACamarillo city6.492.28815296.90006.2
14244MOMaplewood city19.691.52531396.60001.7
6547ILGrand Ridge village10.894.83906383.73.30.36.88.8
14122MOIronton city36.380.62375096.90000.6
4150FLArcher city35.590.6211465838100
5427GAMineral Bluff CDP251003309765.632.1003.7
5649GAWarwick city38.564.14300039.157.70.212.5
5573GASocial Circle city12.479.62432154.440.10.223
10038KYIndependence city8.489.82187598.40.300.30.3
28846WINiagara city20.393.28303698.300.401
\n", + "
" + ], + "text/plain": [ + " state city poverty_rate education income share_white \\\n", + "18204 NC Dellview town 0 100 23616 79.3 \n", + "10302 KY White Plains city 14.4 82.3 57334 94.1 \n", + "20058 OH Plumwood CDP 6.2 80.3 53421 48.5 \n", + "20849 OK Mulhall town 28.6 89.4 - 88.2 \n", + "23114 PA Utica borough 20.5 84.3 45469 98.6 \n", + "21753 PA Centre Hall borough 6 94.9 53750 98.2 \n", + "11271 MD Nanticoke Acres CDP 52.6 50 57665 65.3 \n", + "12766 MN Hoffman city 21.8 87.7 45000 94.7 \n", + "21915 PA Eagles Mere borough 2.5 100 56827 95.7 \n", + "4443 FL Hastings town 26.3 81.8 36196 97 \n", + "2124 CA Camarillo city 6.4 92.2 88152 96.9 \n", + "14244 MO Maplewood city 19.6 91.5 25313 96.6 \n", + "6547 IL Grand Ridge village 10.8 94.8 39063 83.7 \n", + "14122 MO Ironton city 36.3 80.6 23750 96.9 \n", + "4150 FL Archer city 35.5 90.6 21146 58 \n", + "5427 GA Mineral Bluff CDP 25 100 33097 65.6 \n", + "5649 GA Warwick city 38.5 64.1 43000 39.1 \n", + "5573 GA Social Circle city 12.4 79.6 24321 54.4 \n", + "10038 KY Independence city 8.4 89.8 21875 98.4 \n", + "28846 WI Niagara city 20.3 93.2 83036 98.3 \n", + "\n", + " share_black share_native_american share_asian share_hispanic \n", + "18204 17.1 0.3 0 2.1 \n", + "10302 1.6 0 0 0 \n", + "20058 44.9 0.1 4.3 1.8 \n", + "20849 0 10.6 0 0 \n", + "23114 0.3 0.1 0.4 1 \n", + "21753 0.3 0 0.4 0.9 \n", + "11271 23 0.4 5.5 5.9 \n", + "12766 1.1 0.5 0.7 2.8 \n", + "21915 1.2 0.2 1.5 0.8 \n", + "4443 0.7 0.4 0.4 4.3 \n", + "2124 0 0 0 6.2 \n", + "14244 0 0 0 1.7 \n", + "6547 3.3 0.3 6.8 8.8 \n", + "14122 0 0 0 0.6 \n", + "4150 38 1 0 0 \n", + "5427 32.1 0 0 3.7 \n", + "5649 57.7 0.2 1 2.5 \n", + "5573 40.1 0.2 2 3 \n", + "10038 0.3 0 0.3 0.3 \n", + "28846 0 0.4 0 1 " + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sample(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 29329 non-null object\n", + " 1 city 29329 non-null object\n", + " 2 poverty_rate 29329 non-null object\n", + " 3 education 29329 non-null object\n", + " 4 income 29271 non-null object\n", + " 5 share_white 29268 non-null object\n", + " 6 share_black 29268 non-null object\n", + " 7 share_native_american 29268 non-null object\n", + " 8 share_asian 29268 non-null object\n", + " 9 share_hispanic 29268 non-null object\n", + "dtypes: object(10)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot02/01/15shotgun53.0MASheltonWATrueattackNot fleeingFalse
14Lewis Lee Lembke02/01/15shotgun47.0MWAlohaORFalseattackNot fleeingFalse
25John Paul Quintero03/01/15shot and Taseredunarmed23.0MHWichitaKSFalseotherNot fleeingFalse
38Matthew Hoffman04/01/15shottoy weapon32.0MWSan FranciscoCATrueattackNot fleeingFalse
49Michael Rodriguez04/01/15shotnail gun39.0MHEvansCOFalseattackNot fleeingFalse
.............................................
25302822Rodney E. Jacobs28/07/17shotgun31.0MNaNKansas CityMOFalseattackNot fleeingFalse
25312813TK TK28/07/17shotvehicleNaNMNaNAlbuquerqueNMFalseattackCarFalse
25322818Dennis W. Robinson29/07/17shotgun48.0MNaNMelbaIDFalseattackCarFalse
25332817Isaiah Tucker31/07/17shotvehicle28.0MBOshkoshWIFalseattackCarTrue
25342815Dwayne Jeune31/07/17shotknife32.0MBBrooklynNYTrueattackNot fleeingFalse
\n", + "

2535 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 02/01/15 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 02/01/15 shot gun 47.0 \n", + "2 5 John Paul Quintero 03/01/15 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 04/01/15 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 04/01/15 shot nail gun 39.0 \n", + "... ... ... ... ... ... ... \n", + "2530 2822 Rodney E. Jacobs 28/07/17 shot gun 31.0 \n", + "2531 2813 TK TK 28/07/17 shot vehicle NaN \n", + "2532 2818 Dennis W. Robinson 29/07/17 shot gun 48.0 \n", + "2533 2817 Isaiah Tucker 31/07/17 shot vehicle 28.0 \n", + "2534 2815 Dwayne Jeune 31/07/17 shot knife 32.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True attack \n", + "1 M W Aloha OR False attack \n", + "2 M H Wichita KS False other \n", + "3 M W San Francisco CA True attack \n", + "4 M H Evans CO False attack \n", + "... ... ... ... ... ... ... \n", + "2530 M NaN Kansas City MO False attack \n", + "2531 M NaN Albuquerque NM False attack \n", + "2532 M NaN Melba ID False attack \n", + "2533 M B Oshkosh WI False attack \n", + "2534 M B Brooklyn NY True attack \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False \n", + "... ... ... \n", + "2530 Not fleeing False \n", + "2531 Car False \n", + "2532 Car False \n", + "2533 Car True \n", + "2534 Not fleeing False \n", + "\n", + "[2535 rows x 14 columns]" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Data Cleaning and Prerocessing\n", + " - filling in missing values and changing data types" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state 0\n", + "city 0\n", + "poverty_rate 0\n", + "education 0\n", + "income 58\n", + "share_white 61\n", + "share_black 61\n", + "share_native_american 61\n", + "share_asian 61\n", + "share_hispanic 61\n", + "dtype: int64" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#check for missing values\n", + "data.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- These are blocks of code that clean the object columns and convert them to float data types." + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['state', 'city', 'poverty_rate', 'education', 'income', 'share_white',\n", + " 'share_black', 'share_native_american', 'share_asian',\n", + " 'share_hispanic'],\n", + " dtype='object')" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "#income column\n", + "#remove characters that are not digits\n", + "data['income_col'] = data['income'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "#remove existing spaces and join the digits\n", + "data['income_col'] = data['income_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "#fill in entire spaces with zero as a string\n", + "data.loc[data['income_col'] == \"\", 'income_col'] = '0'\n", + "\n", + "#drop the original column\n", + "data.drop('income', axis=1, inplace=True)\n", + "\n", + "#change the type from object to float and rename the column\n", + "data['income'] = data['income_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('income_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [], + "source": [ + "#poverty column\n", + "data['poverty_col'] = data['poverty_rate'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['poverty_col'] = data['poverty_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['poverty_col'] == \"\", 'poverty_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['poverty_rate'] = data['poverty_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('poverty_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "#education column\n", + "data['education_col'] = data['education'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['education_col'] = data['education_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['education_col'] == \"\", 'education_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['education'] = data['education_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('education_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "#share_white column\n", + "data['share_white_col'] = data['share_white'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_white_col'] = data['share_white_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_white_col'] == \"\", 'share_white_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_white'] = data['share_white_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_white_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "#share_black column\n", + "data['share_black_col'] = data['share_black'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_black_col'] = data['share_black_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_black_col'] == \"\", 'share_black_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_black'] = data['share_black_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_black_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "#share_native_american column\n", + "data['share_native_american_col'] = data['share_native_american'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_native_american_col'] = data['share_native_american_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_native_american_col'] == \"\", 'share_native_american_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_native_american'] = data['share_native_american_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_native_american_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "#share_asian column\n", + "data['share_asian_col'] = data['share_asian'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_asian_col'] = data['share_asian_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_asian_col'] == \"\", 'share_asian_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_asian'] = data['share_asian_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_asian_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "#share_hispanic column\n", + "data['share_hispanic_col'] = data['share_hispanic'].apply(lambda x: re.sub(r\"[^0-9]\", \"\", str(x)))\n", + "\n", + "data['share_hispanic_col'] = data['share_hispanic_col'].apply(lambda x: \"\".join(str(x).split()))\n", + "\n", + "data.loc[data['share_hispanic_col'] == \"\", 'share_hispanic_col'] = '0'\n", + "\n", + "#change the type from object to float\n", + "data['share_hispanic'] = data['share_hispanic_col'].astype('float')\n", + "\n", + "#drop the redundant column\n", + "data.drop('share_hispanic_col', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "id 0\n", + "name 0\n", + "date 0\n", + "manner_of_death 0\n", + "armed 9\n", + "age 77\n", + "gender 0\n", + "race 195\n", + "city 0\n", + "state 0\n", + "signs_of_mental_illness 0\n", + "threat_level 0\n", + "flee 65\n", + "body_camera 0\n", + "dtype: int64" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will fill in the missing values in various columns and also categorize some columns in the killings dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "age_median = killings['age'].median()\n", + "killings['age'].fillna(age_median, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "top_race = killings['race'].describe().top\n", + "killings['race'].fillna(top_race, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "top_flee = killings['flee'].describe().top\n", + "killings['flee'].fillna(top_flee, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [], + "source": [ + "top_armed = killings['armed'].describe().top\n", + "killings['armed'].fillna(top_armed, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "killings.loc[killings['threat_level'] == 'attack', 'threat_level'] = 'high'\n", + "killings.loc[killings['threat_level'] == 'other', 'threat_level'] = 'medium'\n", + "killings.loc[killings['threat_level'] == 'undetermined', 'threat_level'] = 'low'" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [], + "source": [ + "killings['threat_level'] = killings['threat_level'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [], + "source": [ + "killings['manner_of_death'] = killings['manner_of_death'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [], + "source": [ + "killings['gender'] = killings['gender'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [], + "source": [ + "killings['date'] = pd.to_datetime(killings['date'], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exploratory Data Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poverty_rateeducationshare_whiteshare_blackshare_native_americanshare_asianshare_hispanicincome
count29329.00000029329.00000029329.00000029329.00000029329.00000029329.00000029329.00000029329.000000
mean146.864400739.397456724.94394662.46315925.73289214.07334082.03614247991.033619
std127.664304291.350599326.169477150.394184118.51309240.698589161.61449527783.222116
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%48.000000736.000000605.0000001.0000001.0000000.0000008.00000033333.000000
50%121.000000858.000000894.0000007.0000003.0000004.00000025.00000043750.000000
75%212.000000921.000000962.00000035.0000008.00000011.00000071.00000057969.000000
max986.000000999.000000999.000000995.000000997.000000671.000000999.000000250000.000000
\n", + "
" + ], + "text/plain": [ + " poverty_rate education share_white share_black \\\n", + "count 29329.000000 29329.000000 29329.000000 29329.000000 \n", + "mean 146.864400 739.397456 724.943946 62.463159 \n", + "std 127.664304 291.350599 326.169477 150.394184 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 48.000000 736.000000 605.000000 1.000000 \n", + "50% 121.000000 858.000000 894.000000 7.000000 \n", + "75% 212.000000 921.000000 962.000000 35.000000 \n", + "max 986.000000 999.000000 999.000000 995.000000 \n", + "\n", + " share_native_american share_asian share_hispanic income \n", + "count 29329.000000 29329.000000 29329.000000 29329.000000 \n", + "mean 25.732892 14.073340 82.036142 47991.033619 \n", + "std 118.513092 40.698589 161.614495 27783.222116 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 0.000000 8.000000 33333.000000 \n", + "50% 3.000000 4.000000 25.000000 43750.000000 \n", + "75% 8.000000 11.000000 71.000000 57969.000000 \n", + "max 997.000000 671.000000 999.000000 250000.000000 " + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
count2535.00000025352535253525352535.00000025352535253525352535253525352535
uniqueNaN2481879268NaN261417512342
topNaNTK TK2017-01-24 00:00:00shotgunNaNMWLos AngelesCAFalsehighNot fleeingFalse
freqNaN49823631407NaN24281396394241902161117602264
firstNaNNaN2015-01-03 00:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
lastNaNNaN2017-12-07 00:00:00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean1445.731755NaNNaNNaNNaN36.526233NaNNaNNaNNaNNaNNaNNaNNaN
std794.259490NaNNaNNaNNaN12.839056NaNNaNNaNNaNNaNNaNNaNNaN
min3.000000NaNNaNNaNNaN6.000000NaNNaNNaNNaNNaNNaNNaNNaN
25%768.500000NaNNaNNaNNaN27.000000NaNNaNNaNNaNNaNNaNNaNNaN
50%1453.000000NaNNaNNaNNaN34.000000NaNNaNNaNNaNNaNNaNNaNNaN
75%2126.500000NaNNaNNaNNaN45.000000NaNNaNNaNNaNNaNNaNNaNNaN
max2822.000000NaNNaNNaNNaN91.000000NaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed \\\n", + "count 2535.000000 2535 2535 2535 2535 \n", + "unique NaN 2481 879 2 68 \n", + "top NaN TK TK 2017-01-24 00:00:00 shot gun \n", + "freq NaN 49 8 2363 1407 \n", + "first NaN NaN 2015-01-03 00:00:00 NaN NaN \n", + "last NaN NaN 2017-12-07 00:00:00 NaN NaN \n", + "mean 1445.731755 NaN NaN NaN NaN \n", + "std 794.259490 NaN NaN NaN NaN \n", + "min 3.000000 NaN NaN NaN NaN \n", + "25% 768.500000 NaN NaN NaN NaN \n", + "50% 1453.000000 NaN NaN NaN NaN \n", + "75% 2126.500000 NaN NaN NaN NaN \n", + "max 2822.000000 NaN NaN NaN NaN \n", + "\n", + " age gender race city state signs_of_mental_illness \\\n", + "count 2535.000000 2535 2535 2535 2535 2535 \n", + "unique NaN 2 6 1417 51 2 \n", + "top NaN M W Los Angeles CA False \n", + "freq NaN 2428 1396 39 424 1902 \n", + "first NaN NaN NaN NaN NaN NaN \n", + "last NaN NaN NaN NaN NaN NaN \n", + "mean 36.526233 NaN NaN NaN NaN NaN \n", + "std 12.839056 NaN NaN NaN NaN NaN \n", + "min 6.000000 NaN NaN NaN NaN NaN \n", + "25% 27.000000 NaN NaN NaN NaN NaN \n", + "50% 34.000000 NaN NaN NaN NaN NaN \n", + "75% 45.000000 NaN NaN NaN NaN NaN \n", + "max 91.000000 NaN NaN NaN NaN NaN \n", + "\n", + " threat_level flee body_camera \n", + "count 2535 2535 2535 \n", + "unique 3 4 2 \n", + "top high Not fleeing False \n", + "freq 1611 1760 2264 \n", + "first NaN NaN NaN \n", + "last NaN NaN NaN \n", + "mean NaN NaN NaN \n", + "std NaN NaN NaN \n", + "min NaN NaN NaN \n", + "25% NaN NaN NaN \n", + "50% NaN NaN NaN \n", + "75% NaN NaN NaN \n", + "max NaN NaN NaN " + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 29329 non-null object \n", + " 1 city 29329 non-null object \n", + " 2 poverty_rate 29329 non-null float64\n", + " 3 education 29329 non-null float64\n", + " 4 share_white 29329 non-null float64\n", + " 5 share_black 29329 non-null float64\n", + " 6 share_native_american 29329 non-null float64\n", + " 7 share_asian 29329 non-null float64\n", + " 8 share_hispanic 29329 non-null float64\n", + " 9 income 29329 non-null float64\n", + "dtypes: float64(8), object(2)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statecitypoverty_rateeducationshare_whiteshare_blackshare_native_americanshare_asianshare_hispanicincome
0ALAbanda CDP788.0212.0672.0302.00.00.016.011207.0
1ALAbbeville city291.0691.0544.0414.01.01.031.025615.0
2ALAdamsville city255.0789.0523.0449.05.03.023.042575.0
3ALAddison town307.0814.0991.01.00.01.04.037083.0
4ALAkron town42.0686.0132.0865.00.00.03.021667.0
\n", + "
" + ], + "text/plain": [ + " state city poverty_rate education share_white share_black \\\n", + "0 AL Abanda CDP 788.0 212.0 672.0 302.0 \n", + "1 AL Abbeville city 291.0 691.0 544.0 414.0 \n", + "2 AL Adamsville city 255.0 789.0 523.0 449.0 \n", + "3 AL Addison town 307.0 814.0 991.0 1.0 \n", + "4 AL Akron town 42.0 686.0 132.0 865.0 \n", + "\n", + " share_native_american share_asian share_hispanic income \n", + "0 0.0 0.0 16.0 11207.0 \n", + "1 1.0 1.0 31.0 25615.0 \n", + "2 5.0 3.0 23.0 42575.0 \n", + "3 0.0 1.0 4.0 37083.0 \n", + "4 0.0 0.0 3.0 21667.0 " + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "total_state_income = pd.pivot_table(data, values='income', index='state', aggfunc='sum')" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
income
state
AK14900484.0
AL22155211.0
AR18366199.0
AZ15810888.0
CA84771828.0
\n", + "
" + ], + "text/plain": [ + " income\n", + "state \n", + "AK 14900484.0\n", + "AL 22155211.0\n", + "AR 18366199.0\n", + "AZ 15810888.0\n", + "CA 84771828.0" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_state_income.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/UAAAF0CAYAAABiyBmtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7bElEQVR4nO3de5xN9f7H8fe29+wRMmOSy0iOlIhKp1NxXFIkIZFrakTpIuoguUQaROMWJZciEuN+v8aJgzjlEjVxlEsdzGCQcStz378//GafwYxmrb22vZd5PR+PHg/29P3sz17W7L3e3+9aazs8Ho9HAAAAAADAdgoEugEAAAAAAGAOoR4AAAAAAJsi1AMAAAAAYFOEegAAAAAAbIpQDwAAAACATRHqAQAAAACwqaAK9T/88IOioqJy/fnGjRsVFRWlqKgoPffcc6pcubIOHDhwDTsEAAAAACB4uALdQJZJkyZp6dKluuGGG3L9f+rUqaM6depIkiZPnqy//vWvqlChwrVqEQAAAACAoBI0K/W33nqrxo4d6/37zz//7F2Vf/3113Xu3Dnvz44dO6YlS5aoa9eugWgVAAAAAICgEDSh/vHHH5fL9b8TB9555x29++67mj59uurUqaPJkyd7fzZ16lR16NBBbrc7EK0CAAAAABAUgub0+8sdOHBAAwcOlCSlpaWpfPnykqTMzEytX79e3bt3D2R7AAAAAAAEXNCG+vLly2vYsGGKjIzUd999pxMnTkiS9u7dq/Lly6tgwYIB7hAAAAAAgMAK2lAfHR2t3r17KyMjQ5I0ZMgQSdKvv/6qsmXLBrI1AAAAAACCgsPj8XgC3QQAAAAAADAuaG6UBwAAAAAAjAmKUL9v375AtwAAAAAAgO0ERahPT08PdAsAAAAAANhOUIR6AAAAAABgHKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAIDrSEaGJ6DjAVxbrkA3AAAAAMA6TqdDy+eeND2+SeviFnYDwN9YqQcAAAAAwKYI9QAAAAAA2BShHgAAAAAAmyLUAwAAAABgU4R6AAAAAABsilAPAAAAAIBNEeoBAAAAALApQj0AAAAAADZFqAcAAAAAwKYI9QAAAAAA2BSh3sY86akBGQsAAAAACA6uQDcA8xwutw6Pfd7U2LKvT7O4GwAAAADAtcZKPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAICglpbhCchYALADvqceAAAAQS3E6dAbiw6bGvtR87IWdwMAwYWVegAAAAAAbIpQDwAAAACATRHqAQAAAACwKb9cU5+WlqY+ffooISFBBQoU0ODBg1WhQgV/PBUAAAAAAPmWX1bqN2zYoPT0dM2ePVtdunTRmDFj/PE0AAAAAADka34J9eXLl1dGRoYyMzN1/vx5uVzcZB8AAAAAAKv5JW0XKlRICQkJeuKJJ5SUlKSJEyde9f9PSUnRnj17/NHKda1y5co+jWebAwAAO+CYxxhft5eU/7aZlW4vf7tCCoaYGpuWnKb9v+63uCNcL3L73fZLqP/8889Vq1Ytvfnmmzp69Kief/55LVu2TKGhoTn+/6GhoZa8+cAYtjkAAMgPOOYxjm3mm4QRR02NK/NWabY9DPNLqC9atKhCQi7OToWFhSk9PV0ZGRn+eCoAAAAAAPItv4T6Dh066O2331a7du2Ulpam7t27q1ChQv54KgAAAAAA8i2/hPrChQvrww8/9EdpAAAAAADw//xy93sAAAAAAOB/hHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA/AdtIzUgMyFgAAAAg2rkA3AABGuZxufTDzcVNje7RbbXE3AAAAQOCwUg8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGzK5a/Cn3zyidatW6e0tDQ988wzatWqlb+eCgAAAACAfMkvoX7Lli3auXOnZs2apQsXLmjKlCn+eBoAAAAAAPI1v4T6TZs2qWLFiurSpYvOnz+vXr16XfX/T0lJ0Z49e/zRynWtcuXKPo1nm8Ou2PcBIH/hfd8YX7eXlP+2mZXYX+Evue1bfgn1SUlJOnLkiCZOnKj4+Hh17txZX375pRwOR47/f2hoqCVvPjCGbY78in0fAPIX3veNY5sFDtseRvkl1IeHh+u2226T2+3WbbfdptDQUJ06dUo33XSTP54OAAAAyJP0DI9czpwXmvw5FgD8xS+h/v7779cXX3yhjh076vjx47pw4YLCw8P98VQAAABAnrmcDsUsOmpqbJ/mpS3uBgB855dQ/8gjj2jbtm1q2bKlPB6PBgwYIKfT6Y+nAgAAAAAg3/LbV9r92c3xAAAAAACAbwoEugEAAAAAAGAOoR4AAAAAAJsi1AMAAAAAYFOEegAAAAAAbIpQDwAAAACATRHqAQAAAACwKUI9AAAAAAA2RagHAAAAAMCmCPUAAAAAANgUoR4AAAAAAJsi1AMAAAAAYFOEegAAAAAAbIpQDwAAAACATRHqAQAAAACwKUI9AAAAAAA2RagHAAAAAMCmCPUAAAAAANgUoR4AAAAAAJsi1AMAAAAAYFOEegAAAAAAbIpQDwAAAACATRHqAQAAAACwKUI9AAAAAAA2RagHAAAAAMCmCPUAAAAAANgUoR4AAAAAAJsi1AMAAAAAYFOEegAAAAAAbMqVl/9p7969io6O1rlz5/Tkk0/qjjvu0COPPOLv3gAAAAAAwFXkaaV+yJAhev/99xUeHq6WLVtq7Nix/u4LAAAAAAD8iTyffl+uXDk5HA5FRESocOHC/uwJAAAAAADkQZ5CfVhYmGbPnq0LFy5oxYoVKlq0qL/7AgAAAAAAfyJPoX7o0KGKj49XsWLFtGvXLg0ZMsTffQEAAAAAgD+Rpxvl3XDDDWrSpIlSU1MlSYcOHVJ4eLg/+wIAAAAAAH8iT6H+5ZdfVmpqqsLCwuTxeORwOPTxxx/7uzcAAAAAAHAVeQr1KSkpmjFjhr97AQAAAAAABuQp1P/tb3/T119/rQoVKngfi4yM9FtTAAAAAADgz+Up1P/2228aOnSo9673DodDs2fP9mtjAAAAAADg6vIU6n/99VetWrXK370AAAAAAAAD8vSVdhUrVtT333+v1NRU738AAAAAACCw8rRSv23bNq1fv977d4fDobVr1/qrJwBBIj0jVS6n+5qPBQAAAJA3eQr1y5Ytk8fj0alTpxQeHi6n0+nvvgAEAZfTrSnTGpga+8LzayzuBgAAAMDl8nT6/ZYtW1S/fn29+OKLeuyxx7R582Z/9wUAAAAAAP5Enlbqx4wZo5kzZ6pkyZJKTExU165dVbNmTX/3BgCA7aVmpMvtzNPHraVjAQBA/pCnIwWn06mSJUtKkkqWLKnQ0FC/NgUAwPXC7XSp8YKJpsauaPGqxd0AAIDrTZ5CfZEiRTR9+nQ98MAD2rZtm8LCwvzdFwAAAAAA+BN5uqZ+xIgROnLkiEaPHq2jR49q6NCh/u4LCHoZ6ea/2tGXsQAAAACQJU8r9UlJSapSpYp69+6tkSNH6ty5c6zWI99zutxa+VkjU2MbvbjS4m4AAAAA5Ed5Wqnv1auXbr75ZknSww8/rH79+vm1KQAAAAAA8OfyFOol6aGHHpIkPfDAA8rMzPRbQwAAAAAAIG/ydPp90aJFNWfOHFWrVk1xcXEqXLiwv/sCAAAAAAB/Ik8r9TExMdq/f79GjBihAwcOcKM8AAAAAACCQJ5W6iMiIvTqq68qJSVFkpScnOzXpgAAAJA3qRmZcjvzfEWlZWMBAMEhT6E+OjpaGzduVIkSJeTxeORwODR79mx/9wYAAIA/4XYW0NMLvjE1dmGLGhZ3AwC41vIU6uPi4vTVV1+pQAFmcgEAAAAACBZ5Sum33nqr99R7AAAAAAAQHPK0Un/s2DE98sgjKleunBwOhyRx+j0AAAAAAAF21VA/b948tWrVSpGRkYqMjPQ+nhXsAQAAAABA4Fw11JcqVUqSVLt27WvSDAAAAAAAyLurhvqsMN+8eXPDhX/77Tc9/fTTmjJliipUqGCuOwAA4JWakS63M09Xzlk6FgAABC+/fLqnpaVpwIABKliwoD/KAwCQL7mdLjVZ8JmpsctbvGhxNwAAIBj45Tvqhg0bprZt26pEiRL+KA8AAAAAAOSHlfqFCxcqIiJCtWvX1qeffpqnMSkpKdqzZ4/VrVz3Kleu7NN4trlv8sP2D9bXGKx9ATmxcn9l30dO8sN+we+RMb6+RskerzNY5Yd9DIGR275leahfsGCBHA6HvvnmG+3Zs0e9e/fWhAkTdPPNN+c6JjQ01JI3HxjDNg+s/LD9g/U1BmtfQE6s3F/Z95GT/LBf8HtkXH55ncGIbQ+jLA/1sbGx3j9HRUUpOjr6qoEeAAAAAACY45dr6gEAAAAAgP/59bttpk+f7s/yAAAAAABIkjzpGXK4nAEbHyh8YS0AAAAAwPYcLqeOj11renyJ1+tZ2M21w+n3AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAQMCkZqQFZCwAAMD1ghvlAQACxu0MUaNF75oau7L5QIu7AQAAsB9W6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAABc5zzpmQEZ68/a/uzLTlyBbgAAAAAA4F8OVwEd+2C3qbGlelSxuJv/cbgKKPHDzabGlvxHTYu7sSdW6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8gX0vPSA3IWAAAAMAKrkA3AACB5HK69d6cx02N7d9mtcXdIFikZqTL7TT3EenLWAAAAKM46gAA4DJup0uNF35oauyKp/9hcTf5S2pGhtxOZ8DGAwBgN4R6AAAQNNxOp56cv8D0+GUtW1jYDQAAwY9r6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeACyS5uP31vs6HgAAAPkPd78HAIuEON3qNb+h6fHDW35pYTcAAADID1ipBwAAAADApgj1AAAAAADYFKEeAAAAMCE9wxOQsQCQHdfUAwAAACa4nA59uvC4qbEvP13C4m4A5Fes1AMAAAAAYFOEegAAAAAAbIpQDwAAAACATRHqAQAAAACwKUI9AAAAAAA2RagHAAAAAMCmCPUAAAAAANgUoR4AAOBPpGZkBHQ8AAC5cQW6AQAAgGDndjr11PzVpscvafm4hd0AAPA/rNQDQSAjPTUgYwEAAADYGyv1QBBwutxaOLWhqbFPd/zS4m4AAAAA2AUr9ZAkZfqw2uvLWAAAAACAeazUQ5JUwOXWz+OeMjX2zi5LLO4GAAAAAJAXrNQDQBBKy/DtDBhfxwMAAMAeWKkHgCAU4nSr4yJz91mQpKnNudcCAABAfsBKPQAAAAAANkWoBwAAAADApq7bUO9JTw/IWAAAAAAArpXr9pp6h8ulExMnmRp786svWdwNAAAAAADWu25X6gEAAAAAuN4R6gEAAAAAsClCPQAAAAAANmX5NfVpaWl6++23lZCQoNTUVHXu3Fn16tWz+mkAAAAAAMj3LA/1S5cuVXh4uEaMGKGkpCQ1b96cUA8AAAAAgB9YHuobNmyoxx9/3Pt3p9P5p2NSUlK0Z88eS/uoXLmyT+Ot7scfrHyN+WF7WS0/bP/80Fcw1cpez8pawYx9LHC1glWw7vvB2pfEfpEX+e13Mpj31/zget0v7NDX9b7v5/b6LA/1hQsXliSdP39eb7zxhrp16/anY0JDQy35B7BSsPXjD1a+xvywvayWH7Z/fujL6tcYzL0Fo2B9jcH67xis28tqwfo66StwgvX3KJi3fTD3dr0L1m2fX/oK1td5NX65Ud7Ro0fVvn17PfXUU3ryySf98RQAAAAAAOR7lq/Unzx5Ui+88IIGDBigGjVqWF0eAAAAAAD8P8tX6idOnKizZ89q/PjxioqKUlRUlJKTk61+GgBAHqVmpAZ0PAAAAPzH8pX6/v37q3///laXBQCY5Ha69cSSZ0yPX/XULAu7AQAAgJX8ck399caTnh6QsQAAAAAAXI3lK/XXI4fLpeMTR5kaW+LVNy3uBgAAAICVMtM9KuByXPOxf8aTnimHy9w6rC9jYS+EegAAcF1KzciQ2+kM2HgA9lHA5dC+jxNNjb2ja0mLu/kfh6uAjo38xdTYUj1vs7gbBCtCPQAAuC65nU41nb/U9PilLZta2A0AAP7B+RgAAENSM9ICMhYAAABXYqUeAGCI2xmiJxZ3NzV2VbPRFncDANeHjAyPnE5z12X7MhaA/RHqAQAAgABzOh2as+CkqbFtWhS3uBsAdsLp9wAAAAAA2BSh/hrzpJu/ntSXsQAAAACA6w+n319jDleIjo7vZ2ps6deGWNwNAAAAAMDOWKkHAAAAYDuZ6Z6AjAWCTVCt1HvSM+RwOa/5WAAAAAD+lZnhUQEf7tJ/+fgCLod+mHTcVK17Xyphug8g2ARVqHe4nDoxYYapsTd3fs7ibgAAAABYpYDToW+nnTA9vvrzN1vYDXD94PR7AADgk9SMjICMBQAAQbZSDwAA7MftdKrJ/Nmmxi5v2dbibgAA8J2vl3dfy8vDCfUAAAAAAGTjcDl1/OOVpseX6NrIwm6ujtPvAQAAAACwKUI9AAAAJEmpGZkBGQsAMI/T7wEAACBJcjsLqOWCnabGzm9xn8XdAADygpV6AAAAAABsilAPAAAAAIBNEeoBAMiH+G55AACuD1xTDwBAPnTxu+Wnmxq7vGWUxd0AAACzWKkHAAAAAMCmCPUAAAAAANgUoR4AAAAAAJsi1MNymempAR0PAAAAAPkFN8qD5Qq43No58UnT4+97dZmF3QAAAADA9YuVegAAAAAAbIpQDwAAAACATRHqgetMhg/3JPBlLAAAAIBrj2vqgeuM0+XWzM8fNzW2XYfVFnfzP+kZqXI53dd8LAAAABBInvQMOVxOv40n1AO4JlxOtybMMDfZ0Pk5/002AAAAAP7kcDl1fNxC0+NLdHn6qj/n9HsAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApgj1AAAAAADYFKEeAAAAAACbItQDAAAAAGBThHoAwHUhNSM9IGMB5Cw1IzMgYwEgv3EFugEAAKzgdrrUaNEwU2NXNu9tcTcA3M4CarNgr6mxc1pUtLgbALh+sVIPAAAAAMgzT7r5s2l8GYucsVIPAAAAAMgzh6uAEsd8Z2psyW73W9wNWKkHAAC4xlIzMgIyFgBw/WGlHvlGRnqqnC53wMYDAJDF7XSq+YJ/mRq7qMUjFncDALAzQj3yDafLrbWTG5seX6/TCgu7AQAAAADfcfo9AAAAAAA2RagHAAAAAMCmCPUAAAAAANgUoR4AAAAAAJsi1AMAAAAAYFOEegAAAAAAbIpQDwAAAACATRHqAQAAAOQoI8MT0PEA/pwr0A0AAAAACE5Op0PrYk+YHv/oszdb2A2AnLBSDwAAAACATRHqAQAAAACwKb+cfp+Zmano6Gj9/PPPcrvdeu+991SuXDl/PBUAAAAAAPmWX1bqv/rqK6WmpmrOnDl68803FRMT44+nAQAAAAAgX/NLqP/uu+9Uu3ZtSVK1atW0a9cufzwNAAAAAAD5msPj8Vj+PRP9+vVTgwYN9PDDD0uS6tatq6+++kouV85n+3///fcKDQ21ug0AAAAAAK4LLpdLd9xxx5WP++PJihQpot9//93798zMzFwDvXRxNR8AAAAAABjjl9Pv//rXv2rjxo2SLq7CV6xY0R9PAwAAAABAvuaX0++z7n6/d+9eeTweDR06VBUqVLD6aQAAAAAAyNf8EuoBAAAAAID/+eX0ewAAAAAA4H+EegAAAAAAbIpQDwAAAACATdkq1KekpAS6hRxt2LDB8JjZs2crNTX1isdjY2OtaAkAAAAAkA8EZagfNGjQFY8dOHBALVu2tOw53nrrLcNjFi5cqFq1aql+/fr6z3/+o3Pnzukf//iHRo4cabjWsGHD9OyzzyoxMfGSx1evXm24Vm727t2rAQMGmBrr8Xi0detWLV68WFu2bJHZ+ykG630Yc+srISHB0ufZvn27JXW+//57S+r4w+X7sBGHDx9WXFycTzXyg2vxexSsk6awn//85z+BbsHvjh8/nuPjP/zwwzXuxD569epl2WfinDlzLKkjSUeOHMn1P6PS0tJyfPzUqVOmejt16pT++OOPSx6bOXOmqVqXS09P14oVKyyp5YvTp097//zbb7+Z3lZZgu244ueff87x8SVLlpiq58/jAbP7xIIFC/zQzf/4kmfyE1egG8jJqVOnNHr0aHXv3l2StHTpUo0YMcJUEM/Nr7/+anjM1KlTtWLFCp04cUIxMTE6fvy46tWrZyrUV61aVW3atFG7du00YsQI/fWvf5Xk+y9rRkaG1qxZo9jYWJ08eVKtWrUyXOPkyZN65ZVXVK5cOd1yyy1at26dYmJi9Mknn6hEiRKGaj3//PP64osvDPeQk7S0NI0dO1ZdunRRaGio1q9fr+3bt6tbt25yuYztytn7GjZsmHr37i1J6tu3r2X9SlJMTIzmz59vamxqaqqWLVum2NhYpaamavny5YZr1KpVK9efbdq0yVRfWb799lvFxsZqx44d2rx5s6Gx8fHx6tatm0JCQnTTTTfpyJEjuuGGGzR69GjD+1hu3nrrLY0YMcLQmJy21++//67k5GTt2bPHUK2UlBTNnj1b7du3V2JiooYOHSq3263evXvr5ptvNlTr2Wef1ciRIxUZGWloXF4cPnxYsbGxWrp0qf7973/7VOv8+fNatGiRZs2apZUrVxoam5qaqtGjR2v16tVKTU1V4cKF1ahRI3Xp0sXw77d0MVjeddddVzz+1VdfqX79+oZqXe3zonz58oZqRUVFyeFw5PgzM+89CQkJWrRokRISEhQZGanmzZvrlltuMVxHkn766SdVqlRJaWlpmjt3rtxut1q0aKECBYzN/8fExFj2Pnr+/Hm9++67GjhwoIoUKaLly5dr7dq1Gjx4sIoUKWK43owZM7Ry5UqdPn1apUqVUqNGjUwtGPTs2dP7GrO/14waNcrwa+/bt2+uP3v//fcN93Z54HU4HIqIiFDdunUN/y49+uijl+yvWccoDodDa9euNVSrQYMGmjx5sgYNGqQWLVqoWbNmCgsLM1Qjy+bNm7VhwwYNHTpU4eHhpmpk6d69uxwOhzwejw4cOKDbb79dHo9HDodDs2fPNlzro48+uuR3ZuvWrerVq5fWr19vqNYnn3yi+fPnKyMjQ0OGDFG5cuXUvXt3FSlSRO3atTNUK7vjx49r9uzZmj9/vipXrqzGjRsbGp/bcYXD4dDXX39tqNbWrVvVu3dvLV68WGFhYfrpp5/Uv39/jRgxQn/7298M1bLyuOLjjz/O9Wddu3Y1VKtv376KiopS8+bNJUkXLlxQdHS0Dh48qKeeespQLcna4+osvu4TS5YsUYsWLSztyYo8c7Xj3KsdH+emQYMGatasmVq2bOnzsWqPHj00aNAgU59juQnKUD9q1Ch169ZN48eP17Fjx7R3717NnDlTZcuWDWhf4eHhCgsLU1hYmA4cOKDo6Gg9/PDDpmo5HA41adJE5cuXV48ePfTCCy+oTZs2pns7ceKE5syZoyVLlqhatWpKTU3Vl19+aapWTEyMevbsqRo1angf27hxo95//32NHj3adI++ev/99+VyubwHGNWqVdOmTZsUExOj/v37G6qVffJk9+7dOT5uBTP14uPjFRsbq1WrVsnj8Wj06NHeSR+j3nzzTVPjcvPHH394Q9uJEyf0zjvvaNSoUYbrxMTEqE+fPpd8aG/evFmDBg266oepEWYm7i7/AJg1a5amTJmiPn36GK713nvvqVChQsrMzNTAgQN1991364477lB0dLTGjRtnqFanTp304osv6rXXXtOTTz5puJecbNiwQTNmzNCOHTv08ssva/HixaZr7d+/XzNmzNCqVav0+OOPKyYmxnCNYcOG6eabb9aqVasUGhqq8+fPa/LkyRo2bJj69etnuF72YNmxY0dNnTpV0sXgbDTUDxgwwLIgPnDgwEv+/tNPP2no0KFq0qSJoTqSFBcXp379+unZZ59VtWrVdPDgQb366qsaMmSI7r33XkO1pk6dqpUrV2rWrFkaNmyYjhw5osjISA0dOtTw+6uV3n33Xd19990qXLiwJKlhw4ZKTExUdHS04Qn1sWPH6sSJExo6dKiKFy+uhIQETZkyRcePH9drr71mqFb29/Zjx47l+HheNWrUyPtnKxYwTpw4ccVju3bt0po1azR8+HBDtR599FHt2rVLf//739W0aVOfJhbr16+v+vXr6+TJk1q8eLE6dOig22+/XW3atDEc4D766COtWLFC7du3V69evUwdoGfJPgkSFRWl6dOnm65VpkwZ9enTx7udJ0yYoAULFpha/FmxYoVWrFihpKQk9ejRQydPntRLL71k+qzVrVu3asaMGdqzZ48KFCigOXPmqHTp0obr/Otf/9K6desUFham6tWrS7q4z7333nuGa40ZM0bTp0/3Tu7UrFlTU6ZMUb9+/QyfkWDlcUXx4sUv+fuFCxc0adIklSlTxnConz59uvr166dt27apdevW6tevnx577DENHTrUUB1/sGqfSE5O1n//+98c3/+MTnxbmWdmz56tG2+8McefmXnPmD17tpYsWaKXX35ZZcqUUevWrU3nwGrVqqlNmzYaOHCg4fe/3ARlqHc6nRo9erS6du2q5ORkzZw50/BKQZacZmk8Ho/Onz9vuFb2g7rIyEjT/5BZPUhSlSpVNGvWLPXo0UO7d+9WRkaGqXoNGjRQVFSUFi1apCJFiqhTp06mezt27NglgV6S6tSpo/HjxxuutX///lyDpdEwuHv37ks+fMPDw9WvXz9Ts3fZZX8Tyu3A3Syj9Tp37qyzZ8+qWbNmWr58ubp162Y60EvSL7/84l2BWLFihZo0aeJdgTBq8ODB+vbbb1W/fn2NGzdOgwcPNhVEpItn41z+JlazZk1NmjTJVD2rJSYmql+/fipcuLDmzp2rYsWKGa5x5MgRffbZZ0pJSdF3332njz76SCEhIZoyZYrhWo8++qjuv/9+DR8+XOvXr/fO+EvGP5imTJmiRYsW6c4779QLL7ygzMxMvfLKK4Z7ki5eLhQbG6u0tDQ9/fTT+vXXX3O8fCovdu/efcmqWJEiRdStWzdFRUWZqpf99zo9PT3Hx/PK7Mp3Tm677TZvH59++qkWL16sDz74QA8++KDhWh9++KE++eQTb9CqVauW6tSpowEDBngnMfJq48aNmj17thwOh5YvX67Vq1crLCxMbdu2NdzXjh07ct0vjZ4hdPTo0Us+K1wul1588UVTk+CbNm265DPkzjvv1Pvvv6/27dsbDvW5MfPeWrt2be+fP/3000v+bkZuoaN9+/aGa/Xv31+ZmZnatGmTxo8frzNnzqh+/fp64okn5Ha7TfVXvHhxderUSVFRURo/frw6duyoH3/80XCdxo0bq1KlSmrTpo0KFizofdyXs9B8PQbo27ev3nvvPfXv31+JiYkqVKiQFi5cqKJFixquFRYWJrfbrZIlSyoxMVEffvihqlSpYqqvp59+Wrfddpvatm2r6tWr6+WXXzYV3qSLZ6Y4nU6dPHlSBw4cUJkyZdS/f39T79VOp/OK99fy5cubOua38rgi+/ve9u3b9c477+jZZ5/Vq6++arhW4cKFNWbMGL3yyit65plnNHDgQLVu3dpwnSxWHVc3b95cFSpUsGSf+PXXXzVgwIArPl8dDofhiW8r88zp06f1008/6cEHH1Tt2rVVs2ZNU7+LWSIiItSxY0d17NhRcXFxWrBggcaMGaPHHnvM8GdI+/btVbduXQ0cOFBVq1ZVs2bNvD8zOhGSJShDfdYbcqtWrTRkyBBNmzZNd9xxhyTjB7A5XRuSlJRk6trp06dPa/PmzcrMzNT58+cv+eAw2td9993n/XNERIR3ZnLnzp2G+5KkIUOGaP78+Xr++efVokWLXK/ryguzEyg5KVGihE9nIGQXGhp6xWMOh0M33HCD4VrZP7itCPJt2rS5oo7H49Evv/xiqI7H45HL5VJycrIyMzN97i37G//333+vHj16mK713XffqUqVKrr33ntVtmxZn3rL7RTQzMxMw7WsnLiTLp5G9vHHH+sf//iH6UkL6X/71Y4dO3T33XcrJCREkvlr18PCwnTPPffos88+u+Rg2kyob9y4sZ5++mndeeedpiYZsvTu3VvPP/+8OnTooGLFimnNmjWma2Vtn8uZ3c9y+x03U2/37t1KTk7Wk08+qfvuu8/nM3r++9//qnfv3rrzzjs1f/587yq0UampqVesnJYtWzbHm7D+mQIFCsjpdGr37t0qW7asd+XMzGu97777fFrtvLyvnOS2v1xNTiE063UbZfVniD9qZUlOTtakSZNM77cFChRQnTp1VKdOHZ0+fVrR0dEaPHiw4uLiTNXbvn27lixZou+++07169c3fV33/PnzNWHCBPXv3/+SA+JA69+/v959911lZGToww8/NF0n+75QunRp04Feku6++27t3LlTGzduVMmSJX3azw4dOqSFCxcqNTVVLVq0UEhIiKZNm6YKFSoYruXxeJSZmXnJ73lGRoapY1grjyuki5d8fvDBB/rmm280atSoHC/nyoukpCT16dNHbrdbU6ZM0ZAhQ+TxeEwfG1t1XH3vvfdqx44dluwTlStXtuySACvzzIwZM5SamqqdO3dq69atmjt3riTpgQce8Hki95577vEepy9ZssRUvVtvvVUdOnRQ3759tXPnTu+im9ltGZShPvsb/EMPPaS9e/dqy5Yt2rx5s+HZ3OzXo8XFxWnGjBn68ccfTZ2+VKVKFe91zXfddZf3tCgzfWUPV3FxcYqNjdWmTZtMfzA1atRIjRo1Unx8vObPn6/Dhw+rW7dueuqpp/TII48YqhUZGal169bp0Ucf9T62fv16lSlTxnBfN954o6kVqJxEREToxx9/1N133+19LC4uzlSo3717t3cmdv/+/Wrbtq33ejoz6tSp4/23S0xMVMmSJU3VmThxoo4dO6b58+erVatW+uOPP7RhwwbVrl3b58kWXw8WFy9erB07dmjevHmKiYnxbi8zH+SnT5++Iox7PB6dOXPGcK3cDgizT5zl1euvv64dO3bozTffVHh4uE8Td4UKFdKcOXO0evVqNWnSRJmZmVqwYIGpmfDDhw/r7bffVrFixTRnzhxTZw5kWbdunVavXq0hQ4YoOTlZFy5c0Llz53I9Re1qVq9erUWLFunZZ59VxYoVlZSUZLov6eJB1OXBw5ebdGbVu/zPRi1dulR79+7V0qVL9emnn+qBBx5Q06ZNVa5cOcO1pk+frs8//1x9+/ZVnTp1JMkbwo2ufOZ0sOrxeEyFeuniasvChQu9nxn79u0z9b5jZTAtV67cFfdBWLt2reH7UlytLzP7RPazEU6fPu39s5n3MKtdfilNWlqaIiIiLvlMNyIzM1ObN2/WihUrtGfPHtWpU8fUvWLGjh2r5cuXq1y5cmrdurXeffddU/fLkKSXXnpJmZmZmjlzpunP2yzZz944fvz4JX83Gp6yxlaqVEkbNmzQe++9512UMlorMTFRc+bMkcfj8Z6SbLavgQMH6vjx4/rnP/+pd955R/v27dPMmTPVqFEjw/ckyLoG2O12KzMzU1OmTDF9X4OmTZuqR48eevXVV3XLLbfo2LFjGjdunJ544gnDtaw8rvjPf/6jvn37qnbt2po3b56pScQsrVu31gsvvKBnnnlG0sUbHb799tvavHmzPvroI8P1rDqurlq1qvr06aNVq1b5vE8cOnRIKSkpOS6+GVWhQgVNmTLFkjwjXdxPq1SpojNnzuj333/X7t27fbqZa0JCghYvXqyVK1eqQoUK3vcyo86dO6fBgwfr4MGDmj59uunV+ewcnmC9Pfn/ywrimzdvVoMGDQxvuNTUVK1YsUIzZ85USEiIzp8/r7lz515yqlYg+4qNjZXb7bakrzlz5qhFixZyuVzatm2b9uzZo2+++UYTJkwwVOfUqVN6/fXXdeONN+rWW29VfHy8fvvtN02YMEERERGGan322WcqUqSIt6/t27dr37593jc3I44dO6bXXntNpUuXVtmyZXX06FHFx8frww8/NHx6bEJCgs6dO6fPPvtMSUlJ+tvf/qaGDRsqJCTE1ORF+/btvTNr2f/si6NHj2r9+vX68ssvdfDgQcM32blaj746f/68li1b5j2wM3rnUytvDJXbwY2ZmxxZ2deRI0cUGxuryMhItWvXTt9++62++OILNW7c2PAZAJUrV9awYcPUtGnTSx7funWrTx/uBw8e1Lx587Ry5UpVrVrV1AFGlm+++UZz587VDz/8oMcff9x788m8uvyGXNkZvSHX5fUuv+TETL3stm3bpunTp+vYsWPemX8jfUk5B0yjfY0bN04XLlxQjx49VKBAAWVmZuqDDz6Qy+VSt27dDNWKi4vT4MGDVaZMGQ0ZMkS7d+9Wr169NGbMGFWrVs1QrTp16qhmzZo5/szo79HZs2fVo0cP/fbbb96D/mLFimn48OGGDzyrVq2a45gzZ84YnphftGhRjo87HA7Dk/PZJwxPnz59SY9mTiW//BRcj8ejhQsXqmDBglq3bp2hWgMHDtS2bdv04IMPqkmTJj5dDvbII4+oVatWKl269BX7v9FtNmPGDBUqVCjHSSejtbKut46Pj1dCQoLKlCnjPaYwev10sNaKjY3VZ599JpfLpf79+6tMmTKaP3++Vq5cafhrma0+3lm5cqVmz56tEydOKDIyUk8++aSpBa7LP7/PnDkjp9OpIkWKGH7fqVq1qgoXLqy//OUvV3yOGD2u6Nq1a47X9E+dOlUdO3Y0VEu6eFx90003ef9esGBBValSxfB9x4YMGaL169erZs2aatOmjdxut+l9YuDAgfr6669Vq1YttWnTRpUrVzY0PruoqCgdO3ZMDzzwgGrXrq0aNWpo586dmjdvnuHLgKdOnar169fr3LlzqlGjhmrXrq3777/f9CTNQw89pGLFiqlly5Zq3rz5Jf8OZmp16NBBr776qmUT4UG5Up9TEF+7dq2pwPvoo4+qSZMmGjFihP7yl7+oU6dOpoOzP/oaOXKkz31JF2fA9+3bp6ZNm8rlcql06dKaNm2aqdO1IiIi1KRJE1WpUkUJCQl67LHHtG/fPsOBXrp4Y7UffvjB21epUqX0+eef69SpU+rSpYuhWqVKldLgwYM1bNgwrV+/Xk2bNlX37t1NXe8aFxenyZMnq23btoqIiNCRI0f0xhtv6I033jAV6rPPjfkyT7Z//34NGjRIX3zxhV544QUVLVpUx44dM33afI8ePbzX1F9+HZbRexpcfolB1qUCZmY8c7sswcwb2wcffGB4TG6s7OuVV17RtGnTvL83WR9MI0eONBzqy5Yte8WZAuPHj9fcuXN9muwpV66cevbsqW7duqlhw4aGx19+EFWwYEFVrFhR8+bNMxzqH3roIcPPfy3rSRcns/75z39q+fLlunDhwhWTLNe6r1deeUUfffSR6tWrp7CwMJ05c0YNGzY09X4xa9Ys3X777ZIu3uQxJSVF999/v+bMmWM41IeFhWn79u1q2rSpz5crrFu3To0bN1ZCQoJ3xaVUqVJav3694QP/wYMH5/i4md/vy98rPB6PFi1apNDQUMN95XZ9rNkDvez1Dh48qD59+qhu3bp6++23DdeaNWuWwsPDtWbNmisurzE64dCkSRNduHDBu+0yMzO1aNEiFSxY0PA2u/zryrJPXBit1bFjR7355ptKSkrSLbfcon379unUqVOmPluCtdayZcv05Zdf6vz58+rVq5cmT56s3r17m3qvyDqWsOK4Yvfu3fr000+9n2XR0dGKj49X0aJFDZ9Z8txzz+ntt9/W/Pnz9a9//UvR0dG68cYb1atXL0N1JKlevXqmxuXk7NmzOT5uJtBLFyf+sn8N4B9//KHx48erffv2hs5C7tevn3r16qW1a9dq9OjROnv2rFq0aGH4m2ukizc0TUtLu6JWkyZNDJ9NO3369BxPmTdzM7lx48apdu3aeuWVV/TAAw/4dMaFdPEsgsTERP3yyy/aunWrT9fop6amauHChTp8+LAl1/tLQRrqrQzi7du31/Lly5WQkKCWLVv6dHARrH1JF29yNHfuXO9BwC233KLRo0erbdu2hmd0syYImjVrpnvuuUfx8fGaNm2akpKSDAfxq/VltNaqVas0efJktWnTxvt1JVlB3OjdrL/44gtNnz5dhQoV8j7WvHlzde7c2XAtybrrK0eOHOm983Hx4sU1ffp0HTx4UP379zf11SfZb/Zi5oZX2VkZnq2sZWYSJjdW9tWlSxe99NJLmjZtmtLS0tSzZ0+53W4tXLjQcK0ePXpYVisnLpfL1FdL7dq1S8nJyVeEt2effdZwrcuvW5d8myCzst6qVau0YsUKHTlyRA0aNNDAgQNN3zzPyr7eeecdSVL16tX122+/qUKFCjp9+rT69etneGVq165dSklJseS+AcuWLbPscoXsl0SlpKTI4/Fo+PDhpsKbv4J4VnB++OGHTQVnK/vKLjY2VtOmTVPfvn1NnbYq5X5mhZnPOSsnG6ysNWrUKDVs2PCSbT1v3jwNHz7c8I0/g7WW2+2W2+1WRETEJdcomwk5Y8aM8f7Z1+OK0aNHKyYmRm63W2PGjNGkSZNUrlw5derUyXCoHz16tIYNG6aQkJAratWrV89QraSkJMuOLQ4fPpzrsYWZSZWcJgFTUlIUFRVl+NLikJAQNWzYUA0bNtTx48f1xRdf6NFHH9WWLVsM95VTrbp165qqldMp80a/Vli6eAbh9u3btXHjRn3wwQe6+eabVadOHT388MOmvslj5syZuU44GM00O3futKxWlqAM9VYG3pdfflkvv/yytm7dqnnz5mnXrl0aMWKEnnrqKVWsWPG66Eu6eP3u5R+yISEhl4TWvLIyiN9www059mXmxlBWBnGXy3XFtilSpIipGyZJ/7tGP2vmOuvPRk/VunDhgveeAVnXOJcrV+6Su3cbYdX9DCRrw7OVtaxkZV8NGzZURkaGOnbsqLNnz6p9+/amwq7VtXJj5iDdyvBm5XXrVtfr3r27brvtNlWqVEl79+695Ks9ja5MWdlX9iDeuHHjoAniklSxYkX17NlT0sXLFUaNGmXqcgV/B0GzQTyLFcHZ6r4SExPVt29fhYWFad68eaa/C17yz4SDFdvMylo//fSTBgwYcMljrVq1MnXfgGCtlZ2vi0hWHld4PB5VqlRJiYmJunDhgvfsUjP38sitlpnPNiuDeMGCBS25XvpqQkNDTa9Cp6Sk6J///KcWL16s33//3aev1LSi1uWnzNetW1dvvvmmqdcXEhKiGjVqeL/Ra+PGjfrkk080aNAgU5MEknUTDlbXkoI01FsdeKWLb0IPPvigzp49qyVLlqhXr16Gv5c5WPuSLr5pHD58+JJrag4fPmzqjTG3CQIzQfyGG27IsS8zb7JWBvHcnt/sXVKXLl1qatzlst8ZPfu1Q2ZvJoTAaty4sdLT0zVv3jyfv3rRqlpZl2Rk5/F4dPjwYVP1rApvVteysp5V96Kwuq9gDeJZrLhcIUuwBUErg7OVfUkXT3MPCQlR9erVr1jRNToJZeWEg5XbzMpauX2+mjm2CNZaVp4yb6WsY66vv/7aG7xSU1P1+++/W1brjz/+MFzLyiBevHjxS76G1h9OnDihCxcuGBqzZcsWLV68WFu2bPFebmA2y1hZy8pT5n/88Ud999132r59u3755RdVqlRJzZo104gRI0zVs3LCwcpaWYI6KVgVeLMrWrSooqKiTH/3cbD21bNnT7322muqUaOGypYtqyNHjmjTpk0aNmyY4Vq5TRCYCeJW9mVlEM/pez59ufu9VSu8JUqUUFxcnO655x7vY3Fxcabu9IzAyn4/g0OHDqldu3besGX0IMrKWrmdLunLaZRWhjcra1lVz8qVKSv7koIziFt5uUKwBkErg7PVEwTjxo3zaXxOrJhwsHKbWVkrPDz8im/W+fHHH039OwRrLStPmbdSjRo11LZtWx07dkwTJkzQoUOHFB0drUaNGgW0lpVBvGrVqpbUyXL5xHxKSor27Nlz1Rv95mTs2LFq06aNBg4caPibV/xZy8pT5keOHKlatWqpc+fOuuuuu3y+IZ2VEw5WX+8v2eDu98i7c+fOae3atTp+/LgiIyNVt25d71ePGLFv3z716NEjxyBu5ns6rerr73//u3f2NYvH4/F+3aERW7duzfVn/jiAz6vDhw/rtddeU/Xq1VWuXDkdPnxY33zzjSZOnGjq+h8EjpX7WLDur5eHtyZNmpgOb1bW8kc9q/ijr8uDeKNGjfTcc88FrK9KlSp5L1eQLp2QNRq4sg52qlevfsUBWSBrWfk7aWVfVss+4RAdHe3ThEOwvifGx8erc+fOeuihh1S2bFnFx8d7vznI6B3Fg7VWMDtw4IAiIiJUrFgxHTp0SD///LMee+yxgNYaNmyY4Zu9XiuX7/sFCxbUbbfdZuq42g6yTpnfsWOHT6emWyEtLc074bBt2zafJhysrJWFUI8cWRXErRSswcZqycnJWrduneLj41W6dGnVq1fP1L0RAH+zMrxZWcsf9axiZV/BGsSDNbwF62dIsPYlBfeEg5VSUlK0fv16HT58WCVLlvTpczdYawF2kNMp8zVq1FDNmjWDbnHLygkHK2oR6gEApgRz4ArWoGRlX8EaxHH9YL8AcC09//zzqlWrlv7+979bcsq8layccPDH5AWhHgAAGyJwAQBwbVg54eCPyQtCPQAAAAAANmX8+84AAAAAAEBQINQDAAAAAGBThHoAAPKxGTNmXPXnP//8s7Zt23aNugEAAEYR6gEAyMcmTJhw1Z+vWbNG+/fvv0bdAAAAo1yBbgAAAFwbv/76q/r27SuXyyWn06nq1avrzJkzio6OVs+ePdWvXz+dO3dOSUlJatWqlerVq6dFixYpJCREVapUUXJyskaPHi2n06myZctq0KBBCgkJCfTLAgAgX+Pu9wAA5BOxsbH65Zdf1KdPH23fvl033XSTOnbsqM2bN2v37t1KSEhQgwYNlJiYqKioKK1Zs0Zjx45V8eLF1bZtWzVs2FAzZ87UTTfdpDFjxigyMlKtW7cO9MsCACBfY6UeAIB8omXLlpo0aZI6deqkG2+8Ud27d/f+rHjx4po2bZrWrFmjIkWKKD09/ZKxp06d0vHjx9WtWzdJUnJysmrWrHkt2wcAADkg1AMAkE+sXbtW999/v7p27arly5dr8uTJyjphb8qUKapWrZratWunb7/9Vhs2bJAkORwOZWZmqlixYipVqpTGjx+vG2+8UWvXrlWhQoUC+XIAAIA4/R4AgHzj0KFDeuutt+R0OlWgQAH17dtXMTExKlmypFq2bKno6GgVK1ZM4eHh2rdvn1auXKl///vfGj58uAYMGKD09HSNGzdOHo9HhQsX1vDhw3XTTTcF+mUBAJCvEeoBAAAAALApvtIOAAAAAACbItQDAAAAAGBThHoAAAAAAGyKUA8AAAAAgE0R6gEAAAAAsClCPQAAAAAANkWoBwAAAADApv4PBYRJUZxZwJYAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.catplot(x=total_state_income.index, y='income', data=total_state_income, kind='bar', height=5, aspect=14/5)\n", + "plt.xticks(rotation=90)\n", + "\n", + "# Show plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [], + "source": [ + "avg_state_poverty = pd.pivot_table(data, values='poverty_rate', index='state', aggfunc='mean') " + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poverty_rate
state
AK164.602817
AL187.502564
AR205.609982
AZ221.889135
CA148.695795
\n", + "
" + ], + "text/plain": [ + " poverty_rate\n", + "state \n", + "AK 164.602817\n", + "AL 187.502564\n", + "AR 205.609982\n", + "AZ 221.889135\n", + "CA 148.695795" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_state_poverty.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.catplot(x=avg_state_poverty.index, y='poverty_rate', data=avg_state_poverty, kind='bar', height=5, aspect=14/5)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 29329 entries, 0 to 29328\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 29329 non-null object \n", + " 1 city 29329 non-null object \n", + " 2 poverty_rate 29329 non-null float64\n", + " 3 education 29329 non-null float64\n", + " 4 share_white 29329 non-null float64\n", + " 5 share_black 29329 non-null float64\n", + " 6 share_native_american 29329 non-null float64\n", + " 7 share_asian 29329 non-null float64\n", + " 8 share_hispanic 29329 non-null float64\n", + " 9 income 29329 non-null float64\n", + "dtypes: float64(8), object(2)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Areas of exploration:\n", + "1. Find the correlation between poverty, education and income in the states\n", + " - which states are poor|rich, more educated, higher income\n", + "2. Top 5 states:\n", + " - with the most killings\n", + " - share of race\n", + " - level of education\n", + " - poverty|income levels\n", + " - e.g. is the race of most killings related to the share of race?\n", + "\n", + "3. Describe the average profile of a person being killed by police:\n", + " - age\n", + " - gender\n", + " - race\n", + " - state\n", + " - poverty|income levels\n", + " - education\n", + " - share of race\n", + "4. Are these killings justified?\n", + " - what is the correlation between manner of death and threat_level|flee\n", + " - did the the threat_level justify the manner of death?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Exploring the correlation between income, education and poverty rates in the various states." + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statecitypoverty_rateeducationshare_whiteshare_blackshare_native_americanshare_asianshare_hispanicincome
0ALAbanda CDP788.0212.0672.0302.00.00.016.011207.0
1ALAbbeville city291.0691.0544.0414.01.01.031.025615.0
2ALAdamsville city255.0789.0523.0449.05.03.023.042575.0
3ALAddison town307.0814.0991.01.00.01.04.037083.0
4ALAkron town42.0686.0132.0865.00.00.03.021667.0
\n", + "
" + ], + "text/plain": [ + " state city poverty_rate education share_white share_black \\\n", + "0 AL Abanda CDP 788.0 212.0 672.0 302.0 \n", + "1 AL Abbeville city 291.0 691.0 544.0 414.0 \n", + "2 AL Adamsville city 255.0 789.0 523.0 449.0 \n", + "3 AL Addison town 307.0 814.0 991.0 1.0 \n", + "4 AL Akron town 42.0 686.0 132.0 865.0 \n", + "\n", + " share_native_american share_asian share_hispanic income \n", + "0 0.0 0.0 16.0 11207.0 \n", + "1 1.0 1.0 31.0 25615.0 \n", + "2 5.0 3.0 23.0 42575.0 \n", + "3 0.0 1.0 4.0 37083.0 \n", + "4 0.0 0.0 3.0 21667.0 " + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will create pivot tables that aggregate the mean of the values in the columns for each state, then concatenate them into one dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "state_race = data.pivot_table(index='state', \n", + " values=['share_white', 'share_black', 'share_native_american', 'share_asian', 'share_hispanic'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [], + "source": [ + "state_poverty = data.pivot_table(index='state', \n", + " values=['poverty_rate'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [], + "source": [ + "state_educ = data.pivot_table(index='state', \n", + " values=['education'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [], + "source": [ + "state_income = data.pivot_table(index='state', \n", + " values=['income'],\n", + " aggfunc='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [], + "source": [ + "state_data = pd.concat([state_race, state_poverty, state_income, state_educ], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
share_asianshare_blackshare_hispanicshare_native_americanshare_whitepoverty_rateincomeeducation
state
AK10.6225354.55211322.411268412.892958378.030986164.60281741973.194366634.670423
AL6.104274213.40341926.18461512.299145653.859829187.50256437872.155556724.249573
AR5.005545148.27356741.6303147.114603699.589649205.60998233948.611830727.378928
AZ6.50332610.960089182.332594229.121951557.541020221.88913535057.401330643.731707
CA50.93823924.346912267.30814715.869908634.827201148.69579555697.653088684.221419
\n", + "
" + ], + "text/plain": [ + " share_asian share_black share_hispanic share_native_american \\\n", + "state \n", + "AK 10.622535 4.552113 22.411268 412.892958 \n", + "AL 6.104274 213.403419 26.184615 12.299145 \n", + "AR 5.005545 148.273567 41.630314 7.114603 \n", + "AZ 6.503326 10.960089 182.332594 229.121951 \n", + "CA 50.938239 24.346912 267.308147 15.869908 \n", + "\n", + " share_white poverty_rate income education \n", + "state \n", + "AK 378.030986 164.602817 41973.194366 634.670423 \n", + "AL 653.859829 187.502564 37872.155556 724.249573 \n", + "AR 699.589649 205.609982 33948.611830 727.378928 \n", + "AZ 557.541020 221.889135 35057.401330 643.731707 \n", + "CA 634.827201 148.695795 55697.653088 684.221419 " + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "state_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Let's look at the correlation between the economic attributes of the states:" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poverty_rateincomeeducation
poverty_rate1.000000-0.626781-0.477182
income-0.6267811.0000000.450140
education-0.4771820.4501401.000000
\n", + "
" + ], + "text/plain": [ + " poverty_rate income education\n", + "poverty_rate 1.000000 -0.626781 -0.477182\n", + "income -0.626781 1.000000 0.450140\n", + "education -0.477182 0.450140 1.000000" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "state_data[['poverty_rate', 'income', 'education']].corr(method='pearson')" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pd.plotting.scatter_matrix(state_data[['poverty_rate', 'income', 'education']], figsize=(10, 10));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Income and education have a positive but weak correlation. This means that with high levels of education, there is high levels of income.**\n", + "\n", + "**Income and poverty have a worthy negative correlation; meaning that high levels of poverty relate to low levels of income.**\n", + "\n", + "**There is a weak negative correlation between education and poverty. This means that low education levels relate to high poverty levels.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will look at the top and bottom five states in terms of poverty, income and education." + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [], + "source": [ + "#poor|rich states\n", + "poor_states = state_data['poverty_rate'].sort_values(ascending=False).head(5)\n", + "rich_states = state_data['poverty_rate'].sort_values(ascending=True).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(state\n", + " MS 246.044199\n", + " AZ 221.889135\n", + " GA 215.406699\n", + " AR 205.609982\n", + " LA 203.341772\n", + " Name: poverty_rate, dtype: float64,\n", + " state\n", + " DC 18.000000\n", + " NJ 76.143119\n", + " CT 77.375000\n", + " WY 78.549020\n", + " MA 89.170732\n", + " Name: poverty_rate, dtype: float64)" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poor_states, rich_states" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [], + "source": [ + "high_inc = state_data['income'].sort_values(ascending=False).head(5)\n", + "low_inc = state_data['income'].sort_values(ascending=True).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(state\n", + " NJ 78832.957798\n", + " CT 74141.520833\n", + " MD 71692.177606\n", + " MA 69822.195122\n", + " NY 68863.528428\n", + " Name: income, dtype: float64,\n", + " state\n", + " NM 29773.024831\n", + " MS 33512.030387\n", + " DC 33564.000000\n", + " AR 33948.611830\n", + " WV 34913.782716\n", + " Name: income, dtype: float64)" + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_inc, low_inc" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [], + "source": [ + "high_educ = state_data['education'].sort_values(ascending=False).head(5)\n", + "low_educ = state_data['education'].sort_values(ascending=True).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(state\n", + " DC 893.000000\n", + " HI 832.735099\n", + " MA 826.004065\n", + " ME 821.261538\n", + " WI 816.635779\n", + " Name: education, dtype: float64,\n", + " state\n", + " WY 567.490196\n", + " NM 610.611738\n", + " NV 624.503817\n", + " AK 634.670423\n", + " AZ 643.731707\n", + " Name: education, dtype: float64)" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_educ, low_educ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*These states standout from the above analysis:*\n", + "\n", + "**MA low poverty, high income, high education**\n", + "\n", + "**DC low poverty, low income and high education**\n", + "\n", + "**NJ, CT low poverty, high income**\n", + "\n", + "**WY low poverty, low education**\n", + "\n", + "**MS, AR high poverty and low income**\n", + "\n", + "**AZ high poverty and low education**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Top5 states" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "top5_states = killings.groupby('state')['state'].count().sort_values(ascending=False).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state\n", + "CA 424\n", + "TX 225\n", + "FL 154\n", + "AZ 118\n", + "OH 79\n", + "Name: state, dtype: int64" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_states" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**At this point, *AZ* stands out in the top5 states with most killings; and also has high poverty and low education levels.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will select economic data of the top5 states with most killings:" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [], + "source": [ + "top5_data = state_data.query('@state_data.index in @top5_states.index')" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
share_asianshare_blackshare_hispanicshare_native_americanshare_whitepoverty_rateincomeeducation
state
AZ6.50332610.960089182.332594229.121951557.541020221.88913535057.401330643.731707
CA50.93823924.346912267.30814715.869908634.827201148.69579555697.653088684.221419
FL14.952070123.931373147.2657954.345316709.960784160.60566448552.166667749.193900
OH6.69547336.90946520.6444446.383539815.370370135.60000048856.190123792.947325
TX9.19805451.793360308.7813397.001717718.241557168.23010945645.395535653.486548
\n", + "
" + ], + "text/plain": [ + " share_asian share_black share_hispanic share_native_american \\\n", + "state \n", + "AZ 6.503326 10.960089 182.332594 229.121951 \n", + "CA 50.938239 24.346912 267.308147 15.869908 \n", + "FL 14.952070 123.931373 147.265795 4.345316 \n", + "OH 6.695473 36.909465 20.644444 6.383539 \n", + "TX 9.198054 51.793360 308.781339 7.001717 \n", + "\n", + " share_white poverty_rate income education \n", + "state \n", + "AZ 557.541020 221.889135 35057.401330 643.731707 \n", + "CA 634.827201 148.695795 55697.653088 684.221419 \n", + "FL 709.960784 160.605664 48552.166667 749.193900 \n", + "OH 815.370370 135.600000 48856.190123 792.947325 \n", + "TX 718.241557 168.230109 45645.395535 653.486548 " + ] + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_data" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot2015-02-01shotgun53.0MASheltonWATruehighNot fleeingFalse
14Lewis Lee Lembke2015-02-01shotgun47.0MWAlohaORFalsehighNot fleeingFalse
25John Paul Quintero2015-03-01shot and Taseredunarmed23.0MHWichitaKSFalsemediumNot fleeingFalse
38Matthew Hoffman2015-04-01shottoy weapon32.0MWSan FranciscoCATruehighNot fleeingFalse
49Michael Rodriguez2015-04-01shotnail gun39.0MHEvansCOFalsehighNot fleeingFalse
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 2015-02-01 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 2015-02-01 shot gun 47.0 \n", + "2 5 John Paul Quintero 2015-03-01 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 2015-04-01 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 2015-04-01 shot nail gun 39.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True high \n", + "1 M W Aloha OR False high \n", + "2 M H Wichita KS False medium \n", + "3 M W San Francisco CA True high \n", + "4 M H Evans CO False high \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False " + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [], + "source": [ + "killings['race'] = killings['race'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 2535 entries, 0 to 2534\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 2535 non-null int64 \n", + " 1 name 2535 non-null object \n", + " 2 date 2535 non-null datetime64[ns]\n", + " 3 manner_of_death 2535 non-null category \n", + " 4 armed 2535 non-null object \n", + " 5 age 2535 non-null float64 \n", + " 6 gender 2535 non-null category \n", + " 7 race 2535 non-null category \n", + " 8 city 2535 non-null object \n", + " 9 state 2535 non-null object \n", + " 10 signs_of_mental_illness 2535 non-null bool \n", + " 11 threat_level 2535 non-null category \n", + " 12 flee 2535 non-null object \n", + " 13 body_camera 2535 non-null bool \n", + "dtypes: bool(2), category(4), datetime64[ns](1), float64(1), int64(1), object(5)\n", + "memory usage: 173.9+ KB\n" + ] + } + ], + "source": [ + "killings.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [], + "source": [ + "#group state killings by race\n", + "race_count = killings.groupby(['state', 'race'])['race'].count()\n", + "race_count.name = 'race_count'" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "state_race_count = race_count.reset_index(level='race')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will select the top5 states and their counts of killings." + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "top5_race = state_race_count.query('@state_race_count.index in @top5_states.index')" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
racerace_count
state
AZA0
AZB5
AZH37
AZN8
AZO0
AZW68
CAA15
CAB65
CAH169
CAN1
CAO8
CAW166
FLA1
FLB49
FLH18
FLN0
FLO2
FLW84
OHA2
OHB30
OHH0
OHN0
OHO2
OHW45
TXA2
TXB46
TXH66
TXN1
TXO3
TXW107
\n", + "
" + ], + "text/plain": [ + " race race_count\n", + "state \n", + "AZ A 0\n", + "AZ B 5\n", + "AZ H 37\n", + "AZ N 8\n", + "AZ O 0\n", + "AZ W 68\n", + "CA A 15\n", + "CA B 65\n", + "CA H 169\n", + "CA N 1\n", + "CA O 8\n", + "CA W 166\n", + "FL A 1\n", + "FL B 49\n", + "FL H 18\n", + "FL N 0\n", + "FL O 2\n", + "FL W 84\n", + "OH A 2\n", + "OH B 30\n", + "OH H 0\n", + "OH N 0\n", + "OH O 2\n", + "OH W 45\n", + "TX A 2\n", + "TX B 46\n", + "TX H 66\n", + "TX N 1\n", + "TX O 3\n", + "TX W 107" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_race" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- I will create a dataframe that contains the percentage share ofeach race killed in each of the top5 states." + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [], + "source": [ + "race_pivot = top5_race.pivot_table(index=top5_race.index, values='race_count', columns='race', aggfunc=['sum'])\n", + "race_pivot.columns = ['sum_asian', 'sum_black', 'sum_hispanic', 'sum_natives', 'sum_others', 'sum_whites']\n", + "race_pivot.drop('sum_others', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": {}, + "outputs": [], + "source": [ + "race_share = top5_data[['share_asian', 'share_black', 'share_hispanic', 'share_native_american', 'share_white']]" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sum_asiansum_blacksum_hispanicsum_nativessum_whitesshare_asianshare_blackshare_hispanicshare_native_americanshare_white
state
AZ05378686.50332610.960089182.332594229.121951557.541020
CA1565169116650.938239240.000000267.30814715.869908634.827201
FL1491808414.952070123.931373147.2657954.345316709.960784
OH23000456.69547336.90946520.6444446.383539815.370370
TX2466611079.19805451.793360308.7813397.001717718.241557
\n", + "
" + ], + "text/plain": [ + " sum_asian sum_black sum_hispanic sum_natives sum_whites \\\n", + "state \n", + "AZ 0 5 37 8 68 \n", + "CA 15 65 169 1 166 \n", + "FL 1 49 18 0 84 \n", + "OH 2 30 0 0 45 \n", + "TX 2 46 66 1 107 \n", + "\n", + " share_asian share_black share_hispanic share_native_american \\\n", + "state \n", + "AZ 6.503326 10.960089 182.332594 229.121951 \n", + "CA 50.938239 240.000000 267.308147 15.869908 \n", + "FL 14.952070 123.931373 147.265795 4.345316 \n", + "OH 6.695473 36.909465 20.644444 6.383539 \n", + "TX 9.198054 51.793360 308.781339 7.001717 \n", + "\n", + " share_white \n", + "state \n", + "AZ 557.541020 \n", + "CA 634.827201 \n", + "FL 709.960784 \n", + "OH 815.370370 \n", + "TX 718.241557 " + ] + }, + "execution_count": 256, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_race_data = pd.concat([race_pivot, race_share], axis=1)\n", + "top5_race_data['share_black']['CA'] = 240\n", + "top5_race_data" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [], + "source": [ + "top5_race_data['%_asian'] = round((top5_race_data['sum_asian'] / top5_race_data['share_asian']) * 100)\n", + "top5_race_data['%_black'] = round((top5_race_data['sum_black'] / top5_race_data['share_black']) * 100)\n", + "top5_race_data['%_hispanic'] = round((top5_race_data['sum_hispanic'] / top5_race_data['share_hispanic']) * 100)\n", + "top5_race_data['%_natives'] = round((top5_race_data['sum_natives'] / top5_race_data['share_native_american']) * 100)\n", + "top5_race_data['%_whites'] = round((top5_race_data['sum_whites'] / top5_race_data['share_white']) * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "top5_race_data.drop(['sum_asian', 'sum_black', 'sum_hispanic', 'sum_natives', 'sum_whites',\n", + " 'share_asian', 'share_black', 'share_hispanic', 'share_native_american',\n", + " 'share_white'], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [], + "source": [ + "top5_race_pc = top5_race_data.astype(np.int64)" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
%_asian%_black%_hispanic%_natives%_whites
state
AZ04620312
CA292763626
FL74012012
OH3081006
TX2289211415
\n", + "
" + ], + "text/plain": [ + " %_asian %_black %_hispanic %_natives %_whites\n", + "state \n", + "AZ 0 46 20 3 12\n", + "CA 29 27 63 6 26\n", + "FL 7 40 12 0 12\n", + "OH 30 81 0 0 6\n", + "TX 22 89 21 14 15" + ] + }, + "execution_count": 260, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5_race_pc" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['%_asian', '%_black', '%_hispanic', '%_natives', '%_whites'], dtype='object')" + ] + }, + "execution_count": 261, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "races = top5_race_pc.columns\n", + "races" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#a visualization of each race by state\n", + "for race in races:\n", + " values= top5_race_pc[race]\n", + " labels= top5_race_pc.index\n", + " plt.axis('equal')\n", + " plt.title(race)\n", + " plt.pie(values, labels=labels, radius=2, autopct='%0.0f%%')\n", + " plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Blacks have a generally higher percentage of killings in each state compared to other races.**\n", + "\n", + "**TX has a large share of killings of Native Americans and Blacks.**\n", + "\n", + "**CA has a large share of killings of Whites, Hisapnics and Asians.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Profile of a person killed by police" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamedatemanner_of_deatharmedagegenderracecitystatesigns_of_mental_illnessthreat_levelfleebody_camera
03Tim Elliot2015-02-01shotgun53.0MASheltonWATruehighNot fleeingFalse
14Lewis Lee Lembke2015-02-01shotgun47.0MWAlohaORFalsehighNot fleeingFalse
25John Paul Quintero2015-03-01shot and Taseredunarmed23.0MHWichitaKSFalsemediumNot fleeingFalse
38Matthew Hoffman2015-04-01shottoy weapon32.0MWSan FranciscoCATruehighNot fleeingFalse
49Michael Rodriguez2015-04-01shotnail gun39.0MHEvansCOFalsehighNot fleeingFalse
\n", + "
" + ], + "text/plain": [ + " id name date manner_of_death armed age \\\n", + "0 3 Tim Elliot 2015-02-01 shot gun 53.0 \n", + "1 4 Lewis Lee Lembke 2015-02-01 shot gun 47.0 \n", + "2 5 John Paul Quintero 2015-03-01 shot and Tasered unarmed 23.0 \n", + "3 8 Matthew Hoffman 2015-04-01 shot toy weapon 32.0 \n", + "4 9 Michael Rodriguez 2015-04-01 shot nail gun 39.0 \n", + "\n", + " gender race city state signs_of_mental_illness threat_level \\\n", + "0 M A Shelton WA True high \n", + "1 M W Aloha OR False high \n", + "2 M H Wichita KS False medium \n", + "3 M W San Francisco CA True high \n", + "4 M H Evans CO False high \n", + "\n", + " flee body_camera \n", + "0 Not fleeing False \n", + "1 Not fleeing False \n", + "2 Not fleeing False \n", + "3 Not fleeing False \n", + "4 Not fleeing False " + ] + }, + "execution_count": 263, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": {}, + "outputs": [], + "source": [ + "race_avg_age = killings.groupby('race')['age'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "metadata": {}, + "outputs": [], + "source": [ + "race_avg_age.name = 'average_age'" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "race\n", + "A 36.538462\n", + "B 31.669903\n", + "H 33.018913\n", + "N 30.451613\n", + "O 33.071429\n", + "W 39.942693\n", + "Name: average_age, dtype: float64\n" + ] + } + ], + "source": [ + "print(race_avg_age)" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "34.11550197750503" + ] + }, + "execution_count": 267, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "race_avg_age.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "M 2428\n", + "F 107\n", + "Name: gender, dtype: int64" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "killings.gender.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [], + "source": [ + "males = len(killings[killings['gender'] == 'M'])\n", + "females = len(killings[killings['gender'] == 'F'])\n", + "total = killings.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2428, 107, 2535)" + ] + }, + "execution_count": 270, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "males, females, total" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [], + "source": [ + "males_pc = round((males / total) * 100)\n", + "females_pc = round((females / total) * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(96, 4)" + ] + }, + "execution_count": 272, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "males_pc, females_pc" + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.axis('equal')\n", + "plt.title('Gender Percentage of Killings')\n", + "plt.pie([males_pc, females_pc], labels=['male', 'female'], radius=2, autopct='%0.0f%%')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**The average profile of a victim killed by police is a Black male, aged 34 years and most likely living in TX.**\n", + "\n", + "**The average profile of a victim killed by police is a Hisapnic male, aged 34 years and most likely living in CA.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Justification of killings" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.swarmplot(data=killings,\n", + " x='age',\n", + " y='flee',\n", + " hue='manner_of_death')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Victims of police shootings averagely between the age of 20 and 45 were either not fleeing, or fleeing by car and foot.**\n", + "\n", + "**Victims aged 50 and beyond were mostly shot when not fleeing.**" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(data=killings,\n", + " y=\"manner_of_death\",\n", + " hue='threat_level')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Majority of victims who were shot were a high threat level.**\n", + "\n", + "**Victims who were shot and tasered were both high and medium threat levels.**\n", + "\n", + "*It can be seen that majority of the killings were justified as victims who were high and medium threat levels.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}