Skip to content

Commit

Permalink
python code modified as per pylint suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
bushraqurban committed Jan 25, 2025
1 parent 5ed32f0 commit a88840e
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 21 deletions.
9 changes: 5 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""
This module contains the main Flask application for the course recommendation system.
"""

from flask import Flask, render_template, request
import os
from data_loader import load_clean_data
from helper import get_recommendations

Expand Down Expand Up @@ -27,16 +30,14 @@ def recommend():
"""
Recommendation page route. Processes the user's input and provides course recommendations.
This route receives a course name from the form on the home page and uses it to find similar courses.
This route receives a course name from the form and uses it to find similar courses.
It then renders the 'recommendations.html' template with the recommendations.
Returns:
- Rendered HTML template for the recommendations page with the recommended courses.
"""
course_name = request.form['course_name']

recommendations = get_recommendations(course_name, data, similarity_matrix)

return render_template('recommendations.html', recommendations=recommendations)

if __name__ == '__main__':
Expand Down
20 changes: 12 additions & 8 deletions data_loader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Module to load and clean course data and similarity matrix.
"""

import os
import pickle
import pandas as pd
Expand All @@ -8,7 +12,8 @@ def load_clean_data():
This function loads the precomputed similarity matrix and the course data from CSV files.
It handles any potential errors during file loading and logs them for debugging.
It also ensures that the data does not have duplicates and ascii characters in the data are removed.
It also ensures that the data does not have duplicates and ascii characters in the data are
removed.
Returns:
- data: Clean DataFrame containing course information.
Expand All @@ -18,23 +23,22 @@ def load_clean_data():
try:
# Get the absolute path to the current directory (where the script is located)
current_dir = os.path.dirname(os.path.abspath(__file__))

# Build paths to the necessary files in the 'models' and 'data' directories
similarity_matrix_path = os.path.join(current_dir, 'models', 'similarity_matrix.pkl')
data_path = os.path.join(current_dir, 'data', 'coursera.csv')

# Load the files
similarity_matrix = pickle.load(open(similarity_matrix_path, 'rb'))
with open(similarity_matrix_path, 'rb') as f:
similarity_matrix = pickle.load(f)
data = pd.read_csv(data_path, encoding='utf-8')

except (FileNotFoundError, pickle.UnpicklingError, pd.errors.EmptyDataError) as e:
# Log the error and raise an exception if loading fails
print(f"Error loading files: {e}")
raise Exception(f"Error loading files: {e}")

# Drop duplicates from the course data based on key columns
data = data.drop_duplicates(subset=['Course Name', 'University', 'Difficulty Level', 'Course Rating', 'Course URL', 'Course Description'])

data = data.drop_duplicates(subset=['Course Name', 'University', 'Difficulty Level',
'Course Rating', 'Course URL', 'Course Description'])

# Function to remove non-ASCII characters
def remove_non_ascii(text):
return text.encode('ascii', 'ignore').decode('ascii') if isinstance(text, str) else text
Expand Down
20 changes: 11 additions & 9 deletions helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import re

"""
Module for generating course recommendations based on similarity scores.
"""

def normalize_rating(rating_str):
"""
Normalize the course rating to a 0-1 scale.
Expand All @@ -13,7 +14,8 @@ def normalize_rating(rating_str):
except ValueError:
return 0 # Return 0 if the rating is invalid

def get_recommendations(course_name, data, similarity_matrix, top_n=6, threshold=90, rating_weight=0.05):
def get_recommendations(course_name, data, similarity_matrix, top_n=6,
rating_weight=0.05):
"""
Get top N course recommendations based on similarity to the given course name.
Expand All @@ -31,12 +33,12 @@ def get_recommendations(course_name, data, similarity_matrix, top_n=6, threshold
"""
course_name = data[data['Course Name'] == course_name] # Filter data for selected course
course_idx = course_name.index[0] # Get the index of the selected course
similarity_scores = list(enumerate(similarity_matrix[course_idx])) # Get similarity scores for all courses
similarity_scores = list(enumerate(similarity_matrix[course_idx])) # Get similarity for courses

recommendations = []
for idx, similarity_score in sorted(similarity_scores, key=lambda x: x[1], reverse=True)[:top_n]:
course_data = data.iloc[idx] # Get course data for the current recommendation
normalized_rating = normalize_rating(course_data.get('Course Rating', '0')) # Normalize rating
normalized_rating = normalize_rating(course_data.get('Course Rating', '0'))

# Prepare recommendation dictionary with relevant course information
recommendations.append({
Expand All @@ -46,8 +48,8 @@ def get_recommendations(course_name, data, similarity_matrix, top_n=6, threshold
"institution": course_data.get('University', 'Unknown'),
"difficulty_level": course_data.get('Difficulty Level', 'Unknown'),
"similarity": similarity_score,
"final_score": similarity_score * (1 - rating_weight) + normalized_rating * rating_weight # Weighted final score
"final_score": similarity_score * (1 - rating_weight) + normalized_rating * rating_weight
})

# Return sorted recommendations based on the final score
return sorted(recommendations, key=lambda x: x['final_score'], reverse=True)
return sorted(recommendations, key=lambda x: x['final_score'], reverse=True)

0 comments on commit a88840e

Please sign in to comment.