Skip to content

Commit

Permalink
smaller mapping fixes, rename all -> merged, corr matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
linozen committed Jul 27, 2021
1 parent 10eba58 commit c7c48b6
Show file tree
Hide file tree
Showing 13 changed files with 20,103 additions and 3,704 deletions.
48 changes: 24 additions & 24 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,29 @@ jobs:
- name: Check out the repo
uses: actions/checkout@v2

# - name: Build image
# run: docker build . --file Dockerfile --tag $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}"

# - name: Log in to registry
# # This is where you will update the PAT to GITHUB_TOKEN
# run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin

# - name: Push image
# run: |
# IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME

# # Change all uppercase to lowercase
# IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
# # Strip git ref prefix from version
# VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
# # Strip "v" prefix from tag name
# [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
# # Use Docker `latest` tag convention
# [ "$VERSION" == "master" ] && VERSION=latest
# echo IMAGE_ID=$IMAGE_ID
# echo VERSION=$VERSION
# docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
# docker push $IMAGE_ID:$VERSION
- name: Build image
run: docker build . --file Dockerfile --tag $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}"

- name: Log in to registry
# This is where you will update the PAT to GITHUB_TOKEN
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin

- name: Push image
run: |
IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME
# Change all uppercase to lowercase
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
# Strip git ref prefix from version
VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
# Strip "v" prefix from tag name
[[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
# Use Docker `latest` tag convention
[ "$VERSION" == "master" ] && VERSION=latest
echo IMAGE_ID=$IMAGE_ID
echo VERSION=$VERSION
docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
docker push $IMAGE_ID:$VERSION
- name: Deploy Docker image to server using SSH
uses: appleboy/ssh-action@master
Expand All @@ -50,6 +50,6 @@ jobs:
key: ${{ secrets.KEY }}
script: |
docker pull ghcr.io/snv-berlin/streamlit-ioi-base:latest
docker run -d -p 8501:8501 ghcr.io/snv-berlin/streamlit-ioi-base:latest streamlit run --server.port 8501 explorer/all.py
docker run -d -p 8501:8501 ghcr.io/snv-berlin/streamlit-ioi-base:latest streamlit run --server.port 8501 explorer/merged.py
docker run -d -p 8502:8502 ghcr.io/snv-berlin/streamlit-ioi-base:latest streamlit run --server.port 8502 explorer/media.py
docker run -d -p 8503:8503 ghcr.io/snv-berlin/streamlit-ioi-base:latest streamlit run --server.port 8503 explorer/civsoc.py
76 changes: 0 additions & 76 deletions data/all.csv

This file was deleted.

Binary file removed data/all.pkl
Binary file not shown.
Binary file removed data/all.xlsx
Binary file not shown.
83 changes: 73 additions & 10 deletions explorer/civsoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,8 @@ def get_cs_df():
df["CSfoi4"] = df["CSfoi4"].replace(
{
"AO01": "Very helpful",
"AO02": "Helpful in parts",
"AO03": "Not helpful at all",
"AO03": "Helpful in parts",
"AO05": "Not helpful at all",
"AO06": "I don't know",
"AO07": "I prefer not to say",
}
Expand Down Expand Up @@ -607,6 +607,18 @@ def get_cs_df():
]
for label in CSadvocact2_options:
df[f"CSadvocact2[{label}]"] = df[f"CSadvocact2[{label}]"].replace(
{
"AO01": "Very important",
"AO02": "Important",
"AO03": "Somewhat important",
"AO04": "Slightly important",
"AO07": "Not important at all",
"AO09": "I don't know",
"AO11": "I prefer not to say",
}
)
# Coding in LimeSurvey differs for UK
df.loc[is_uk, f"CSadvocact2[{label}]"] = df[f"CSadvocact2[{label}]"].replace(
{
"AO01": "Very important",
"AO02": "Important",
Expand All @@ -618,6 +630,7 @@ def get_cs_df():
}
)


df["CSadvoctrans1"] = df["CSadvoctrans1"].replace(
{
"AO01": "Always",
Expand Down Expand Up @@ -650,6 +663,18 @@ def get_cs_df():
]
for label in CSadvocimpact1_options:
df[f"CSadvocimpact1[{label}]"] = df[f"CSadvocimpact1[{label}]"].replace(
{
"AO01": "Agree completely",
"AO42": "Agree to a great extent",
"AO43": "Agree somewhat",
"AO44": "Agree sligthly",
"AO45": "Not agree at all",
"AO46": "I don't know",
"AO47": "I prefer not to say",
}
)
# Here only DE survey is differenlty coded
df.loc[is_de, f"CSadvocimpact1[{label}]"] = df[f"CSadvocimpact1[{label}]"].replace(
{
"AO01": "Agree completely",
"AO02": "Agree to a great extent",
Expand Down Expand Up @@ -713,6 +738,20 @@ def get_cs_df():
]
for label in CSlitigateimpact1_options:
df[f"CSlitigateimpact1[{label}]"] = df[f"CSlitigateimpact1[{label}]"].replace(
{
"AO01": "Agree completely",
"AO42": "Agree to a great extent",
"AO43": "Agree somewhat",
"AO44": "Agree sligthly",
"AO45": "Not agree at all",
"AO46": "I don't know",
"AO47": "I prefer not to say",
}
)

df.loc[is_de, f"CSlitigateimpact1[{label}]"] = df[
f"CSlitigateimpact1[{label}]"
].replace(
{
"AO01": "Agree completely",
"AO02": "Agree to a great extent",
Expand Down Expand Up @@ -805,6 +844,19 @@ def get_cs_df():
}
)


df["CSprotectops4"] = df["CSprotectops4"].replace(
{
"AO01": "I have full confidence that the right tools <br>will protect my communication from surveillance",
"AO02": "Technological tools help to protect my identity <br>to some extent, but an attacker with sufficient power <br>may eventually be able to bypass my technological <br>safeguards",
"AO03": "Under the current conditions of communications <br>surveillance, technological solutions cannot offer <br>sufficient protection for the data I handle",
"AO04": "I have no confidence in the protection offered by <br>technological tools",
"AO05": "I try to avoid technology-based communication whenever <br>possible when I work on intelligence-related issues",
"AO06": "I don't know",
"AO07": "I prefer not to say",
}
)

df["CSprotectleg1"] = df["CSprotectleg1"].replace(
{
"AO01": "Always",
Expand Down Expand Up @@ -966,6 +1018,17 @@ def get_cs_df():
"AO06": "Civil society organisations",
}
)
# Here, FR is coded differently
df.loc[is_fr, f"CSattitude{i}[{j}]"] = df[f"CSattitude{i}[{j}]"].replace(
{
"AO01": "Parliamentary oversight bodies",
"AO02": "Judicial oversight bodies",
"AO03": "Independent expert bodies",
"AO04": "Data protection authorities",
"AO07": "Audit courts",
"AO06": "Civil society organisations",
}
)

df["CSgender"] = df["CSgender"].replace(
{
Expand Down Expand Up @@ -1054,7 +1117,7 @@ def get_csv_download_link(df):
b64 = base64.b64encode(
csv.encode()
).decode() # some strings <-> bytes conversions necessary here
href = f'<a href="data:file/csv;base64,{b64}" download="ioi_media_only.csv">Download as CSV file</a>'
href = f'<a href="data:file/csv;base64,{b64}" download="ioi_civsoc_only.csv">Download as CSV file</a>'
return href


Expand All @@ -1075,7 +1138,7 @@ def get_excel_download_link(df):
"""
val = to_excel(df)
b64 = base64.b64encode(val)
return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="ioi_media_only.xlsx">Download as Excel file</a>'
return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="ioi_civsoc_only.xlsx">Download as Excel file</a>'


st.write(get_csv_download_link(df), unsafe_allow_html=True)
Expand All @@ -1093,15 +1156,15 @@ def get_excel_download_link(df):


@st.cache
def generate_corr_matrix(df):
df = df.phik_matrix()
def get_corr_matrix(df):
df = pd.read_pickle("./data/civsoc_corr.pkl")
fig = px.imshow(df, zmin=0, zmax=1, color_continuous_scale="viridis", height=1300)
return fig


@st.cache
def generate_significance_matrix(df):
df = df.significance_matrix(significance_method="asymptotic")
def get_significance_matrix(df):
df = pd.read_pickle("./data/civsoc_sig.pkl")
fig = px.imshow(df, zmin=-5, zmax=5, color_continuous_scale="viridis", height=1300)
return fig

Expand All @@ -1115,7 +1178,7 @@ def generate_significance_matrix(df):
]


fig_corr = generate_corr_matrix(df_without_act)
fig_corr = get_corr_matrix(df_without_act)
st.plotly_chart(fig_corr, use_container_width=True)

st.write("# Significance Matrix")
Expand All @@ -1126,5 +1189,5 @@ def generate_significance_matrix(df):
body="$Z=\Phi^{-1}(1-p); \Phi(z)=\\frac{1}{\\sqrt{2\pi}}\int_{-\infty}^{z} e^{-t^{2}/2}\,dt$"
)

fig_sig = generate_significance_matrix(df_without_act)
fig_sig = get_significance_matrix(df_without_act)
st.plotly_chart(fig_sig, use_container_width=True)
12 changes: 6 additions & 6 deletions explorer/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,15 +965,15 @@ def get_excel_download_link(df):


@st.cache
def generate_corr_matrix(df):
df = df.phik_matrix()
def get_corr_matrix(df):
df = pd.read_pickle("./data/media_corr.pkl")
fig = px.imshow(df, zmin=0, zmax=1, color_continuous_scale="viridis", height=1300)
return fig


@st.cache
def generate_significance_matrix(df):
df = df.significance_matrix(significance_method="asymptotic")
def get_significance_matrix(df):
df = pd.read_pickle("./data/media_sig.pkl")
fig = px.imshow(df, zmin=-5, zmax=5, color_continuous_scale="viridis", height=1300)
return fig

Expand All @@ -982,7 +982,7 @@ def generate_significance_matrix(df):
"# Correlation Matrix (Phik `φK`) \nPhik (φk) is a new and practical correlation coefficient that works consistently between categorical, ordinal and interval variables, captures non-linear dependency and reverts to the Pearson correlation coefficient in case of a bivariate normal input distribution. There is extensive documentation available [here](https://phik.readthedocs.io/en/latest/index.html)"
)

fig_corr = generate_corr_matrix(df)
fig_corr = get_corr_matrix(df)
st.plotly_chart(fig_corr, use_container_width=True)

st.write("# Significance Matrix")
Expand All @@ -993,5 +993,5 @@ def generate_significance_matrix(df):
body="$Z=\Phi^{-1}(1-p); \Phi(z)=\\frac{1}{\\sqrt{2\pi}}\int_{-\infty}^{z} e^{-t^{2}/2}\,dt$"
)

fig_sig = generate_significance_matrix(df)
fig_sig = get_significance_matrix(df)
st.plotly_chart(fig_sig, use_container_width=True)
Loading

0 comments on commit c7c48b6

Please sign in to comment.