diff --git a/explorer/lib/__init__.py b/explorer/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/explorer/lib/download.py b/explorer/lib/download.py new file mode 100644 index 0000000..e0d03fc --- /dev/null +++ b/explorer/lib/download.py @@ -0,0 +1,35 @@ +import base64 +from io import BytesIO +import pandas as pd + + +def get_csv_download_link(df, name): + """Generates a link allowing the data in a given panda dataframe to be downloaded + in: dataframe + out: href string + """ + csv = df.to_csv(index=False) + b64 = base64.b64encode( + csv.encode() + ).decode() # some strings <-> bytes conversions necessary here + href = f'Download as CSV file' + return href + + +def to_excel(df): + output = BytesIO() + writer = pd.ExcelWriter(output, engine="xlsxwriter") + df.to_excel(writer, sheet_name="data") + writer.save() + processed_data = output.getvalue() + return processed_data + + +def get_excel_download_link(df, name): + """Generates a link allowing the data in a given panda dataframe to be downloaded + in: dataframe + out: href string + """ + val = to_excel(df) + b64 = base64.b64encode(val) + return f'Download as Excel file' diff --git a/explorer/lib/figures.py b/explorer/lib/figures.py new file mode 100644 index 0000000..f0c5e0d --- /dev/null +++ b/explorer/lib/figures.py @@ -0,0 +1,78 @@ +import pandas as pd +import plotly.graph_objects as go +import plotly.express as px + + +def generate_pie_chart( + df, + values, + names, + hover_name, + color, + color_discrete_sequence, + color_discrete_map, +): + fig = px.pie( + df, + values=values, + names=names, + hover_name=hover_name, + color=color, + color_discrete_sequence=color_discrete_sequence, + color_discrete_map=color_discrete_map, + width=800, + ) + fig.update_traces(textinfo="percent+value") + return fig + + +def generate_histogram(df, x, y, nbins, color, color_discrete_map, labels): + fig = px.histogram( + df, + x=x, + y=y, + nbins=nbins, + color=color, + color_discrete_map=color_discrete_map, + labels=labels, + ) + return fig + + +def generate_stacked_bar_chart(data): + fig = go.Figure(data=data) + fig.update_layout(width=800, height=800, barmode="stack") + return fig + + +def generate_ranking_plot(df, input_col, options, scoring): + input_col_score = pd.Series(index=options) + for i in range(1, 7): + input_col_counts = df[f"{input_col}[{i}]"].value_counts() + scores = input_col_counts.multiply(scoring[i]) + input_col_score = input_col_score.add(scores, fill_value=0) + input_col_score = input_col_score.sort_values(ascending=False) + if i == 1: + ranked_first = df[f"{input_col}[1]"].value_counts() + ranked_first_clean = pd.DataFrame( + {"institution": ranked_first.index, "ranked_first": ranked_first.values} + ) + input_col_df = pd.DataFrame( + { + "institution": input_col_score.index, + "score": input_col_score.values, + } + ) + input_col_df = input_col_df.merge( + ranked_first_clean, on="institution", how="left" + ).fillna(0) + input_col_df = input_col_df.sort_values(["score", "ranked_first"]) + fig = px.bar( + input_col_df.sort_values(by="score"), + y="institution", + x="score", + color="ranked_first", + color_continuous_scale="viridis", + orientation="h", + ) + return fig