satzify/app.py at main · michdr/satzify · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
from annotated_text import annotated_text
from spacy_streamlit import visualize_parser

from satzify import constants
from satzify.helpers import (
    get_annotated_text,
    get_doc,
    get_tokens_df,
    load_spacy_nlp_model,
    split_annotated_text,
)

colors = dict()
st.set_page_config(page_title="satzify - simply annotate sentences")
text = st.text_area("Input text", value=constants.EXAMPLE_TEXT)
nlp = load_spacy_nlp_model()
tokens_df = get_tokens_df(text)

# Sidebar top part
with st.sidebar.beta_expander("What is satzify", expanded=False):
    st.markdown(constants.WHAT_IS_SATZIFY)
sb_header_cols = st.sidebar.beta_columns(2)
sb_header_cols[0].title("Language")
sb_header_cols[1].title(constants.LANGUAGE)


def display_sidebar_for_category(category: str, title: str):
    category_settings = constants.ANNOTATIONS[category]
    st.sidebar.title(title)
    sb_pos_cols = st.sidebar.beta_columns(2)
    for i, (k, v) in enumerate(category_settings.items()):
        colors[k] = sb_pos_cols[i % 2].color_picker(v["name"], v["color"])

    pos_names = [v["name"] for v in category_settings.values()]
    annotated_names = st.sidebar.multiselect(
        f"{category} to annotate", pos_names, default=pos_names
    )

    return colors, annotated_names


# POS
_colors, pos_annotated_names = display_sidebar_for_category("POS", "Parts of speech")
colors.update(_colors)

# CASES
_colors, cases_annotated_names = display_sidebar_for_category(
    "CASES", "Cases and genders"
)
colors.update(_colors)


# Main part - outputs
with st.beta_expander("Parts of speech annotations"):
    list_annotated_text = split_annotated_text(
        get_annotated_text("POS", text, pos_annotated_names, colors)
    )
    for annotations in list_annotated_text:
        annotated_text(*annotations)

with st.beta_expander("Cases and genders annotations"):
    list_annotated_text = split_annotated_text(
        get_annotated_text("CASES", text, cases_annotated_names, colors)
    )
    for annotations in list_annotated_text:
        annotated_text(*annotations)

with st.beta_expander("Text tokens table", expanded=False):
    st.dataframe(tokens_df)

with st.beta_expander("Visualise parser", expanded=False):
    visualize_parser(get_doc(text), title=None)