Skip to content

Commit 8c4cb99

Browse files
committed
fixed model errors and edited code to fix nltk error, still need HN_100k csv
1 parent 813e82c commit 8c4cb99

8 files changed

+39
-17
lines changed

Pipfile

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ fastapi = "*"
1616
pandas = "*"
1717
uvicorn = "*"
1818
plotly = "*"
19+
nltk = "*"
20+
re = "*"
21+
vaderSentiment = "*"
22+
warnings = "*"
1923

2024
[requires]
2125
python_version = "3.7"
337 Bytes
Binary file not shown.
97 Bytes
Binary file not shown.

app/api/model.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
"""
3-
Created on Wed Jul 29 17:07:08 2020
4-
5-
@author: Ronin
6-
"""
7-
81
from warnings import filterwarnings
92
filterwarnings("ignore")
103

@@ -16,6 +9,9 @@
169
import urllib
1710
import re
1811
import pandas as pd
12+
import os
13+
14+
1915

2016

2117
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
@@ -24,9 +20,9 @@
2420
lemmatizer = WordNetLemmatizer()
2521
stemmer = PorterStemmer()
2622

27-
#If you get stopwords error please uncomment the following two lines.
28-
# nltk.download('stopwords')
29-
# nltk.download('wordnet')
23+
#If you get stopwords error pleasew uncomment the following two lines.
24+
nltk.download('stopwords')
25+
nltk.download('wordnet')
3026

3127
def get_compund_score(text):
3228
score = analyzer.polarity_scores(text)
@@ -96,8 +92,21 @@ def get_cummulative_score_for_user(username):
9692
return score
9793

9894

95+
def get_scores_by_user():
96+
dir = os.getcwd()
97+
file_name = 'hn_sentiments100k.csv'
98+
df = pd.read_csv(os.path.normcase(os.path.join(dir, file_name)))
99+
df1 = df.groupby(['by']).agg({'clean_vader_score': "sum"})
100+
df2 = df1.sort_values(['clean_vader_score'], ascending=True)
99101

102+
return df2
103+
104+
100105
def main():
106+
107+
out = get_scores_by_user()
108+
print (out.head(10))
109+
101110
id = 23970146
102111
print(f"Score for the comment id {id} is: ", get_score_by_comment_id(id))
103112

@@ -114,4 +123,4 @@ def main():
114123

115124

116125
if __name__ == "__main__":
117-
main()
126+
main()

app/api/predict.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pydantic import BaseModel, Field, validator
88

9-
import model
9+
from .model import *
1010

1111
log = logging.getLogger(__name__)
1212

@@ -19,8 +19,8 @@ class Item(BaseModel):
1919

2020

2121
comment_id : int = Field(..., example=23970146)
22-
user_name : str = Field(..., example='gmfawcett')
23-
22+
#user_name : str = Field(..., example='gmfawcett')
23+
#comment_id_list : list = Field(..., example=[23970146,457634])
2424

2525
def to_df(self):
2626
"""Convert pydantic object to pandas dataframe with 1 row."""
@@ -41,10 +41,12 @@ async def predict(item: Item):
4141
X_new = item.to_df()
4242
log.info(X_new)
4343

44-
y_pred = model.get_score_by_comment_id(item.comment_id)
45-
U_pred = model.get_cummulative_score_for_user(item.user_name)
44+
y_pred = get_score_by_comment_id(item.comment_id)
45+
#U_pred = get_cummulative_score_for_user(item.user_name)
46+
l_pred = get_scores_by_user()
4647
return {
4748

4849
'Score for comment from id': y_pred,
49-
'user cumulative comment score': U_pred
50+
#'user cumulative comment score': U_pred,
51+
'Score for list of comment ids': l_pred
5052
}
File renamed without changes.
File renamed without changes.

requirements.txt

+7
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,10 @@ urllib
22
json
33
pandas
44
logging
5+
fastapi
6+
pandas
7+
plotly
8+
nltk
9+
re
10+
vaderSentiment
11+
warnings

0 commit comments

Comments
 (0)