-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
144 lines (116 loc) · 6.62 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
import pandas as pd
import pydeck as pdk
import re
# Load the data into Pandas DataFrames
df1 = pd.read_csv('https://raw.githubusercontent.com/jyoti-sn/NSS_NLP/main/NSS_country_coded_Google.csv')
df2 = pd.read_csv('https://raw.githubusercontent.com/jyoti-sn/NSS_NLP/main/US_NSS_Full_Texts.csv')
# Convert 'Year' column in df2 to datetime and then to year data type
df2['Year'] = pd.to_datetime(df2['Year'], format='%Y').dt.year
# Merge the DataFrames on the 'Year' column
df = pd.merge(df1, df2, on='Year')
# Convert 'Text' column to lowercase
df['Text'] = df['Text'].str.lower()
# Presidential DataFrame
presidents_df = pd.DataFrame({
"Year": list(range(1987, 1989)) + list(range(1989, 1993)) + list(range(1993, 2001)) + list(range(2001, 2009)) +
list(range(2009, 2017)) + list(range(2017, 2021)) + list(range(2021, 2025)),
"President": ["Ronald Reagan"] * 2 + ["George H. W. Bush"] * 4 + ["Bill Clinton"] * 8 + ["George W. Bush"] * 8 +
["Barack Obama"] * 8 + ["Donald Trump"] * 4 + ["Joe Biden"] * 4,
"Party": ["Republican"] * 2 + ["Republican"] * 4 + ["Democratic"] * 8 + ["Republican"] * 8 +
["Democratic"] * 8 + ["Republican"] * 4 + ["Democratic"] * 4
})
# Definitions of G-5 and G-20 countries
G5_countries = ['United States', 'United Kingdom', 'France', 'Germany', 'Japan']
G20_countries = ['Argentina', 'Australia', 'Brazil', 'Canada', 'China', 'France', 'Germany',
'India', 'Indonesia', 'Italy', 'Japan', 'Mexico', 'Russian Federation', 'Saudi Arabia',
'South Africa', 'Korea, Republic of', 'Turkey', 'United Kingdom', 'United States', 'European Union']
# Dashboard Header and Layout
st.set_page_config(layout="wide", page_title="How does the white house see the world?")
st.title('How does the white house see the world?')
st.subheader("Analysis of the US National Security Strategy Document: 1987-2022")
# Sidebar layout
with st.sidebar:
# Use a slider for selecting the year
available_years = [1987, 1988, 1990, 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2006, 2010, 2015, 2017, 2022]
selected_year = st.slider('Select a year:', min_value=min(available_years), max_value=max(available_years), value=min(available_years))
# Country filter for excluding or including the United States
country_option = st.selectbox('Filter countries:', ['All Countries', 'Exclude United States'])
# Group Selection for G-5 or G-20
group_option = st.radio("Select Group:", ('G-5', 'G-20'))
# Filter data based on selected year
year_filtered_df = df[df['Year'] == selected_year]
# Display Presidential Information
president_info = presidents_df[presidents_df['Year'] == selected_year]
if not president_info.empty:
st.write(f"President in {selected_year}: {president_info['President'].values[0]} ({president_info['Party'].values[0]} Party)")
# Apply US filter to the dataframe
if country_option == 'Exclude United States':
year_filtered_df = year_filtered_df[year_filtered_df['Country'] != 'United States']
# Apply group filter
if group_option == 'G-5':
group_df = year_filtered_df[year_filtered_df['Country'].isin(G5_countries)]
else:
group_df = year_filtered_df[year_filtered_df['Country'].isin(G20_countries)]
# Calculate the total counts for the year and for each group
total_count_year = year_filtered_df['Count'].sum()
group_percentage = group_df.groupby('Country')['Count'].sum() / total_count_year * 100
# Display the heatmap
heatmap_data = year_filtered_df.groupby(['Latitude', 'Longitude', 'Country']).sum().reset_index()
layer = pdk.Layer(
'HeatmapLayer',
data=heatmap_data,
opacity=0.9,
get_position=['Longitude', 'Latitude'],
get_weight='Count',
radius_pixels=60
)
view_state = pdk.ViewState(latitude=heatmap_data['Latitude'].mean(), longitude=heatmap_data['Longitude'].mean(), zoom=1)
st.pydeck_chart(pdk.Deck(layers=[layer], initial_view_state=view_state, tooltip={
"html": "<b>{Country}</b><br>Mentions: {Count}",
"style": {"color": "white", "font-family": "Arial", "font-size": "12px", "padding": "10px"}
}))
# Display the bar charts
col1, col2 = st.columns(2)
with col1:
st.header("Total Mentions by Country")
bar_chart_data = year_filtered_df.groupby('Country')['Count'].sum()
st.bar_chart(bar_chart_data, use_container_width=True)
st.write(f"Total mentions in {selected_year}: {int(total_count_year)}")
with col2:
st.header(f"{group_option} Countries' Mention Percentages")
group_percentage_chart = st.bar_chart(group_percentage, use_container_width=True)
st.write(f"Total mentions for {group_option} countries in {selected_year}: {int(group_df['Count'].sum())}")
# Create a bar chart for Continents
st.header("Total Mentions by Continent")
continent_mentions = year_filtered_df.groupby('Continent')['Count'].sum()
continent_percentage = continent_mentions / total_count_year * 100
col3, col4 = st.columns(2)
with col3:
st.subheader("Total Mentions")
st.bar_chart(continent_mentions, use_container_width=True)
with col4:
st.subheader("Mention Percentage")
st.bar_chart(continent_percentage, use_container_width=True)
# Word search over time
st.subheader("Word Frequency Over Time")
search_word = st.text_input("Enter a word to search:").lower()
if search_word:
word_counts = df[df['Text'].str.contains(search_word, case=False)].groupby('Year')['Text'].count().reset_index()
st.line_chart(data=word_counts, x='Year', y='Text')
st.write(f"Normalised frequency of the word '{search_word}' in the NSS documents over the years.")
# Calculate correlation with party (normalized)
merged_df = pd.merge(df, presidents_df, on='Year')
republican_years = merged_df['Party'].value_counts()['Republican']
democratic_years = merged_df['Party'].value_counts()['Democratic']
republican_mentions = merged_df[merged_df['Party'] == 'Republican']['Text'].str.contains(search_word).sum() / republican_years
democratic_mentions = merged_df[merged_df['Party'] == 'Democratic']['Text'].str.contains(search_word).sum() / democratic_years
total_mentions = republican_mentions + democratic_mentions
if total_mentions > 0:
republican_percentage = (republican_mentions / total_mentions) * 100
democratic_percentage = (democratic_mentions / total_mentions) * 100
# Display likelihood in large, bold font
st.markdown(f"<div style='font-size: 24px; font-weight: bold;'>Republican: {republican_percentage:.1f}%</div>", unsafe_allow_html=True)
st.markdown(f"<div style='font-size: 24px; font-weight: bold;'>Democratic: {democratic_percentage:.1f}%</div>", unsafe_allow_html=True)
else:
st.write("The word was not found in the dataset.")