-
Notifications
You must be signed in to change notification settings - Fork 0
/
2023_wrapped.py
159 lines (124 loc) · 10.1 KB
/
2023_wrapped.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# Importing necessary libraries
import requests
import duckdb
import streamlit as st
import plotly.express as px
import re
# Set Streamlit page configuration for layout
st.set_page_config(layout='wide')
# Streamlit page setup
col1, col2, col3 = st.columns(3)
col2.title('2023 Wrapped for Gitcoin Donors')
col2.markdown('2023 may be over, but the impact of your donations will continue to burst into the new year. '
'Enter your address below to see your year in review as a Gitcoin donor!')
col2.markdown('For other wins for Gitcoin and community, check out [2023: Year in Review](https://2023.gitcoin.co/). To report issues, ping on Telegram [here](https://t.me/rohitmalekar).')
# Fetch the latest IPFS CID from a specified URL
#LATEST_IPFS_CID_URL = 'https://raw.githubusercontent.com/davidgasquez/gitcoin-grants-data-portal/main/data/IPFS_CID'
#LATEST_IPFS_CID = requests.get(LATEST_IPFS_CID_URL).text.strip()
# Construct the gateway URL
#GATEWAY_URL = f'https://ipfs.filebase.io/ipfs/{LATEST_IPFS_CID}/'
GATEWAY_URL = "https://k51qzi5uqu5dhn3p5xdkp8n6azd4l1mma5zujinkeewhvuh5oq4qvt7etk9tvc.ipns.cf-ipfs.com/data"
# Get the Ethereum address from user input
address = col2.text_input('Paste your Ethereum address here starting "0x":',
help='ENS not supported, please enter 42-character hexadecimal address starting with "0x"')
# Process if a valid address is entered
if address != 'None':
# Display progress bar
progress_text = 'Looking up! Please wait.'
my_bar = col2.progress(0, text=progress_text)
# Check for valid Ethereum address
if not re.match(r'^(0x)?[0-9a-f]{40}$', address, flags=re.IGNORECASE):
col2.error('Not a valid address. Please enter a valid 42-character hexadecimal Ethereum address starting with "0x"')
my_bar.empty()
else:
my_bar.progress(10, text='Valid address found. Searching for your contributions...:mag_right:. Hang tight!')
# Database querying
QUERY = f"""
SELECT
votes.donor_address AS voter,
rounds.round_metadata_name AS round,
strftime('%B %Y', CAST(donations_start_time AS TIMESTAMP)) AS round_start,
projects.title AS project,
any_value(projects.project_twitter) AS project_twitter,
round(sum(votes.amount_in_usd), 0) AS amount
FROM '{GATEWAY_URL}/allo_donations.parquet' AS votes,
'{GATEWAY_URL}/allo_rounds.parquet' AS rounds,
'{GATEWAY_URL}/allo_projects.parquet' AS projects
WHERE votes.round_id = rounds.id
AND votes.project_id = projects.id
AND strftime('%Y', CAST(donations_start_time AS TIMESTAMP)) = '2023'
GROUP BY votes.donor_address, rounds.round_metadata_name, strftime('%B %Y', CAST(donations_start_time AS TIMESTAMP)), projects.title
"""
# All results for 2023
query_all_result = duckdb.sql(QUERY).df()
# All results for the voter for 2023
query_result = query_all_result.query(f"voter == '{address.lower()}'")
# Process query results
if query_result.empty:
col2.error('Sorry, no records found for this address. Please try another address.')
my_bar.empty()
else:
my_bar.progress(90, text='Compiling results for display...')
# Append round name with round start (month and year) to distinguish recurring program rounds
query_result['unique_round'] = query_result['round'] + ' (' + query_result['round_start'] + ')'
# Statistics on unique number of projects, total donations, number of rounds supported
project_num = query_result['project'].nunique()
total_donation = query_result['amount'].sum()
rounds_num = query_result['unique_round'].nunique()
my_bar.progress(100, text='Completed!')
my_bar.empty()
# Display summary statistics
msg = f'## Congratulations! In 2023, you have contributed ${total_donation} to {project_num} projects in {rounds_num} rounds!'
col2.markdown(msg)
# Create and display a sunburst chart
fig = px.sunburst(query_result, path=['unique_round', 'project'], values='amount')
fig.update_layout(width=1000, height=1000)
bcol1, bcol2, bcol3 = st.columns(3)
bcol2.markdown('### Check out your 2023 contribution burst:')
bcol2.markdown('*Tap on a round to drill down*')
st.plotly_chart(fig, use_container_width=True)
# Aggregate data for top rounds and projects
rounds = query_result[['unique_round', 'amount']]
rounds_of_voter = rounds.groupby(['unique_round']).sum()
projects = query_result[['project', 'amount']]
projects_of_voter = projects.groupby(['project']).sum()
# Display top rounds and projects
lcol1, lcol2, lcol3, lcol4 = st.columns([1, 1, 1, 1])
lcol2.markdown('### Your top rounds:')
lcol2.dataframe(rounds_of_voter.sort_values(by="amount", ascending=False).head(5), column_config={"unique_round": st.column_config.SelectboxColumn(label='Round',width="medium"), "amount": st.column_config.NumberColumn(label='$', format='$%.0f')})
lcol3.markdown('### Your top projects:')
lcol3.dataframe(projects_of_voter.sort_values(by="amount", ascending=False).head(5), column_config={"project": st.column_config.SelectboxColumn(label='Project',width="medium"),"amount": st.column_config.NumberColumn(label='$', format='$%.0f')})
st.balloons()
# Display personalized recommendations for 2024
bot_col1, bot_bcol2, bot_bcol3 = st.columns(3)
bot_bcol2.divider()
bot_bcol2.markdown('## 2024 Recommendations')
# Processing for recommendations
# Step 1: Find top 5 projects supported by the voter
top_projects_of_voter = projects_of_voter.sort_values(by='amount', ascending=False).head(5).reset_index()
top_projects_of_voter_list = top_projects_of_voter['project'].tolist()
# Step 2: Find all other voters who also supported the top projects of this voter
voter_cluster = query_all_result[query_all_result['project'].isin(top_projects_of_voter_list)]
voter_cluster_list = voter_cluster['voter'].unique().tolist()
bot_bcol2.markdown("We looked at contributions of voters who support some of your most favorite projects to find out what else they contribute to that you don't. May be you know about these projects, may be you don't - we just thought you might like a sneak peek at this list as we gear up for Gitcoin Grants for 2024.")
bot_bcol2.markdown(f'A total of {len(voter_cluster_list)} unique voters also supported the top 5 projects you contributed to in 2023.')
# Step 3: Find all other projects supported by this voter cluster to base recommendations off of based on (a) contribution amount (b) number of votes
projects_of_voter = projects_of_voter.reset_index()
projects_of_cluster = query_all_result[query_all_result['voter'].isin(voter_cluster_list) & ~query_all_result['project'].isin(projects_of_voter['project'].tolist())]
top_projects_of_cluster_amount = projects_of_cluster[['project', 'project_twitter', 'amount']].groupby(['project', 'project_twitter']).sum()
top_projects_of_cluster_vote = projects_of_cluster[['project', 'project_twitter', 'amount']].groupby(['project', 'project_twitter']).count()
top_projects_of_cluster_amount = top_projects_of_cluster_amount.reset_index()
top_projects_of_cluster_amount['project_twitter'] = 'https://twitter.com/' + top_projects_of_cluster_amount['project_twitter'].astype(str)
top_projects_of_cluster_vote = top_projects_of_cluster_vote.reset_index()
top_projects_of_cluster_vote['project_twitter'] = 'https://twitter.com/' + top_projects_of_cluster_vote['project_twitter'].astype(str)
# bot_bcol2.markdown(f'These voters also contributed to {top_projects_of_cluster_amount["project"].nunique()} other projects that you did not vote for.')
bot_bcol2.markdown('Here are the top projects these voters supported that you may be interested in for your contributions in 2024:')
bot_bcol2.markdown('### Sorted by contribution amount:')
bot_bcol2.caption('The following projects received the most donations from voters who also supported the top 5 projects you contributed to in 2023.')
bot_bcol2.dataframe(top_projects_of_cluster_amount.sort_values(by='amount', ascending=False).head(10),hide_index=True , column_order=("project","project_twitter"), column_config={"project": st.column_config.SelectboxColumn(label='Project',width="medium"),"project_twitter": st.column_config.LinkColumn(label='Twitter') })
bot_bcol2.markdown('### Sorted by number of votes:')
bot_bcol2.caption('The following projects received the most number of votes from voters who also supported the top 5 projects you contribute to in 2023.')
bot_bcol2.dataframe(top_projects_of_cluster_vote.sort_values(by='amount', ascending=False).head(10), hide_index=True , column_order=("project","project_twitter"), column_config={"project": st.column_config.SelectboxColumn(label='Project',width="medium"),"project_twitter": st.column_config.LinkColumn(label='Twitter') })
bot_bcol2.divider()
bot_bcol2.markdown('If you like what you see here, contribute to **Gitcoin Grants Data Portal** and **GrantsScope** next time you see these projects on Gitcoin.')
bot_bcol2.markdown('*This is one of the experiments for the project [GrantsScope](http://grantsscope.xyz/) aiming to improve the discoverability of projects and reduce information asymmetry in public goods funding. Credits to [Gitcoin Grants Data Portal](https://davidgasquez.github.io/gitcoin-data/) by [David Gasquez](https://twitter.com/davidgasquez) that serves as the data staging layer for this reporting.*')