From b33dcbb9ebf33a0937f79a079ec000ce81632b39 Mon Sep 17 00:00:00 2001 From: bushraqurban Date: Sun, 26 Jan 2025 17:29:11 +0500 Subject: [PATCH] relaced local notebook with kaggle notebook --- notebooks/model_training.ipynb | 778 +++++++++++++++++++++++++++++---- 1 file changed, 684 insertions(+), 94 deletions(-) diff --git a/notebooks/model_training.ipynb b/notebooks/model_training.ipynb index 56a7012..745c047 100644 --- a/notebooks/model_training.ipynb +++ b/notebooks/model_training.ipynb @@ -2,41 +2,166 @@ "cells": [ { "cell_type": "markdown", - "id": "c76eba78", - "metadata": {}, + "id": "16b8f24e", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T11:18:19.180530Z", + "iopub.status.busy": "2025-01-26T11:18:19.180161Z", + "iopub.status.idle": "2025-01-26T11:18:19.267703Z", + "shell.execute_reply": "2025-01-26T11:18:19.266170Z", + "shell.execute_reply.started": "2025-01-26T11:18:19.180503Z" + }, + "papermill": { + "duration": 0.006668, + "end_time": "2025-01-26T12:19:33.798600", + "exception": false, + "start_time": "2025-01-26T12:19:33.791932", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

πŸ“Š Course Recommender System on Web App

\n", - "\n" + "
\n", + " \n", + "
\n", + " πŸŽ“ Coursera Course Recommender System\n", + "
\n", + "
\n", + "\n", + "

🎯 Purpose

\n", + "

This notebook demonstrates how to build a Coursera Course Recommendation System using machine learning techniques. It recommends relevant courses based on course details and ratings, leveraging text data like course names, descriptions, skills, and ratings.

\n", + "\n", + "
\n", + "\n", + "

πŸ”— Useful Links

\n", + "\n", + "\n", + "
\n", + "\n", + "

πŸ—‚οΈ Dataset Overview

\n", + "\n", + "\n", + "
\n", + "\n", + "

πŸŽ“ Walkthrough of This Notebook

\n", + "

This notebook is a step-by-step guide to building a recommendation system. Here's the high-level process:

\n", + "\n", + "

1. Import Essential Libraries

\n", + "

We start by importing libraries like `pandas`, `scikit-learn`, and `nltk` to process the data and perform the machine learning tasks.

\n", + "
import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "from nltk.stem import WordNetLemmatizer\n",
+    "from sklearn.decomposition import TruncatedSVD\n",
+    "import pickle\n",
+    "import nltk\n",
+    "import re\n",
+    "from nltk.corpus import wordnet
\n", + "\n", + "

2. Load the Dataset

\n", + "

We load the dataset containing 3,524 Coursera courses into a `pandas` DataFrame and inspect it for structure, missing values, and duplicates.

\n", + "
data = pd.read_csv(\"/kaggle/input/coursera-courses-dataset-2021/Coursera.csv\", encoding='utf-8')\n",
+    "data.head()
\n", + "\n", + "

3. Data Preprocessing

\n", + "

We clean the course name, description, and skills columns by removing unwanted characters and converting text to lowercase.

\n", + "
def clean_for_tags(text):\n",
+    "    text = re.sub(r'οΏ½οΏ½+', '', text) \n",
+    "    text = re.sub(r'[^\\x00-\\x7F]+', '', text) \n",
+    "    text = re.sub(r'[^a-zA-Z\\s]', '', text) \n",
+    "    text = text.lower()  \n",
+    "    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split()])  \n",
+    "    return text
\n", + "\n", + "

4. Text Vectorization with TF-IDF

\n", + "

We convert text data into numerical vectors using the TF-IDF vectorizer, which helps machine learning models understand the importance of each term.

\n", + "
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)\n",
+    "tfidf_matrix = vectorizer.fit_transform(training_data['tags'])
\n", + "\n", + "

5. Dimensionality Reduction with SVD

\n", + "

To make the data more manageable, we reduce the dimensions of the TF-IDF matrix using Singular Value Decomposition (SVD), enhancing performance.

\n", + "
svd = TruncatedSVD(n_components=100, random_state=42)\n",
+    "tfidf_matrix = svd.fit_transform(tfidf_matrix)
\n", + "\n", + "

6. Cosine Similarity

\n", + "

We calculate the cosine similarity between courses based on their TF-IDF vectors. This helps us identify courses that are most similar to each other.

\n", + "
similarity_matrix = cosine_similarity(tfidf_matrix)
\n", + "\n", + "

7. Get Recommendations

\n", + "

We create a function that returns the top N courses based on their similarity to a selected course. Course ratings are also taken into account to improve recommendations.

\n", + "
def get_recommendations(course_name, data, similarity_matrix, top_n=3, rating_weight=0.05):
\n", + "\n", + "

8. Save the Model

\n", + "

Finally, we save the similarity matrix model using `pickle` for future use, so we don’t have to retrain the model each time.

\n", + "
pickle.dump(similarity_matrix, open('similarity_matrix.pkl', 'wb'))
\n" ] }, { "cell_type": "markdown", - "id": "94e56f93", - "metadata": {}, + "id": "f04f9d99", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-25T20:26:05.929949Z", + "iopub.status.busy": "2025-01-25T20:26:05.929565Z", + "iopub.status.idle": "2025-01-25T20:26:05.937167Z", + "shell.execute_reply": "2025-01-25T20:26:05.935893Z", + "shell.execute_reply.started": "2025-01-25T20:26:05.929922Z" + }, + "papermill": { + "duration": 0.005713, + "end_time": "2025-01-26T12:19:33.810523", + "exception": false, + "start_time": "2025-01-26T12:19:33.804810", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Import Dependencies

" + "
\n", + " \n", + "
\n", + " | Importing Essential Tools\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", "execution_count": 1, - "id": "afeb49d3", - "metadata": {}, + "id": "24cd1540", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:33.823735Z", + "iopub.status.busy": "2025-01-26T12:19:33.823343Z", + "iopub.status.idle": "2025-01-26T12:19:37.294461Z", + "shell.execute_reply": "2025-01-26T12:19:37.293081Z" + }, + "papermill": { + "duration": 3.48003, + "end_time": "2025-01-26T12:19:37.296390", + "exception": false, + "start_time": "2025-01-26T12:19:33.816360", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "[nltk_data] Downloading package wordnet to /usr/share/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", "Dependencies Imported\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package wordnet to /Users/bushra/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n" - ] } ], "source": [ @@ -49,6 +174,7 @@ "import pickle\n", "import nltk\n", "import re\n", + "from nltk.corpus import wordnet\n", "\n", "# Download wordnet once (if needed)\n", "try:\n", @@ -62,27 +188,72 @@ }, { "cell_type": "markdown", - "id": "2b7a46c9", - "metadata": {}, + "id": "143dc0e7", + "metadata": { + "papermill": { + "duration": 0.005812, + "end_time": "2025-01-26T12:19:37.308596", + "exception": false, + "start_time": "2025-01-26T12:19:37.302784", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Load Dataset

" + "
\n", + " \n", + "
\n", + " | Load Dataset\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", "execution_count": 2, - "id": "046017bb", - "metadata": {}, + "id": "a017ae85", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.322296Z", + "iopub.status.busy": "2025-01-26T12:19:37.321767Z", + "iopub.status.idle": "2025-01-26T12:19:37.528313Z", + "shell.execute_reply": "2025-01-26T12:19:37.527173Z" + }, + "papermill": { + "duration": 0.215862, + "end_time": "2025-01-26T12:19:37.530395", + "exception": false, + "start_time": "2025-01-26T12:19:37.314533", + "status": "completed" + }, + "tags": [] + }, "outputs": [], "source": [ - "data = pd.read_csv(\"coursera.csv\", encoding='utf-8')" + "data = pd.read_csv(\"/kaggle/input/coursera-courses-dataset-2021/Coursera.csv\", encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 3, - "id": "5fd1ffb0", - "metadata": {}, + "id": "622d24fd", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.544271Z", + "iopub.status.busy": "2025-01-26T12:19:37.543925Z", + "iopub.status.idle": "2025-01-26T12:19:37.569309Z", + "shell.execute_reply": "2025-01-26T12:19:37.568180Z" + }, + "papermill": { + "duration": 0.034287, + "end_time": "2025-01-26T12:19:37.571090", + "exception": false, + "start_time": "2025-01-26T12:19:37.536803", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -217,18 +388,47 @@ }, { "cell_type": "markdown", - "id": "c30c8dec", - "metadata": {}, + "id": "823f7f10", + "metadata": { + "papermill": { + "duration": 0.006147, + "end_time": "2025-01-26T12:19:37.583704", + "exception": false, + "start_time": "2025-01-26T12:19:37.577557", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Basic Data Inspection

" + "
\n", + " \n", + "
\n", + " | Basic Data Inspection\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", "execution_count": 4, - "id": "5cb80836", + "id": "ef223f00", "metadata": { - "scrolled": true + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.597479Z", + "iopub.status.busy": "2025-01-26T12:19:37.597100Z", + "iopub.status.idle": "2025-01-26T12:19:37.602718Z", + "shell.execute_reply": "2025-01-26T12:19:37.601887Z" + }, + "papermill": { + "duration": 0.014371, + "end_time": "2025-01-26T12:19:37.604359", + "exception": false, + "start_time": "2025-01-26T12:19:37.589988", + "status": "completed" + }, + "scrolled": true, + "tags": [] }, "outputs": [ { @@ -249,9 +449,23 @@ { "cell_type": "code", "execution_count": 5, - "id": "95cde0c6", + "id": "362643e3", "metadata": { - "scrolled": true + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.619035Z", + "iopub.status.busy": "2025-01-26T12:19:37.618579Z", + "iopub.status.idle": "2025-01-26T12:19:37.646272Z", + "shell.execute_reply": "2025-01-26T12:19:37.644981Z" + }, + "papermill": { + "duration": 0.037492, + "end_time": "2025-01-26T12:19:37.648403", + "exception": false, + "start_time": "2025-01-26T12:19:37.610911", + "status": "completed" + }, + "scrolled": true, + "tags": [] }, "outputs": [ { @@ -282,8 +496,23 @@ { "cell_type": "code", "execution_count": 6, - "id": "f08d2540", - "metadata": {}, + "id": "5081991f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.662967Z", + "iopub.status.busy": "2025-01-26T12:19:37.662492Z", + "iopub.status.idle": "2025-01-26T12:19:37.671797Z", + "shell.execute_reply": "2025-01-26T12:19:37.670711Z" + }, + "papermill": { + "duration": 0.018462, + "end_time": "2025-01-26T12:19:37.673595", + "exception": false, + "start_time": "2025-01-26T12:19:37.655133", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -310,9 +539,23 @@ { "cell_type": "code", "execution_count": 7, - "id": "01cab0bc", + "id": "405c45b8", "metadata": { - "scrolled": true + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.688503Z", + "iopub.status.busy": "2025-01-26T12:19:37.688132Z", + "iopub.status.idle": "2025-01-26T12:19:37.717601Z", + "shell.execute_reply": "2025-01-26T12:19:37.716576Z" + }, + "papermill": { + "duration": 0.038836, + "end_time": "2025-01-26T12:19:37.719342", + "exception": false, + "start_time": "2025-01-26T12:19:37.680506", + "status": "completed" + }, + "scrolled": true, + "tags": [] }, "outputs": [ { @@ -340,13 +583,28 @@ { "cell_type": "code", "execution_count": 8, - "id": "3b12b05c", - "metadata": {}, + "id": "e0ebb84f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.734470Z", + "iopub.status.busy": "2025-01-26T12:19:37.734127Z", + "iopub.status.idle": "2025-01-26T12:19:37.755762Z", + "shell.execute_reply": "2025-01-26T12:19:37.754728Z" + }, + "papermill": { + "duration": 0.031348, + "end_time": "2025-01-26T12:19:37.757673", + "exception": false, + "start_time": "2025-01-26T12:19:37.726325", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "data": { "text/plain": [ - "np.int64(98)" + "98" ] }, "execution_count": 8, @@ -361,8 +619,23 @@ { "cell_type": "code", "execution_count": 9, - "id": "6933f893", - "metadata": {}, + "id": "7c6fe05c", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.772910Z", + "iopub.status.busy": "2025-01-26T12:19:37.772489Z", + "iopub.status.idle": "2025-01-26T12:19:37.794141Z", + "shell.execute_reply": "2025-01-26T12:19:37.793042Z" + }, + "papermill": { + "duration": 0.031347, + "end_time": "2025-01-26T12:19:37.796134", + "exception": false, + "start_time": "2025-01-26T12:19:37.764787", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -384,17 +657,89 @@ }, { "cell_type": "markdown", - "id": "b84c5ac7", - "metadata": {}, + "id": "f086c467", + "metadata": { + "papermill": { + "duration": 0.006702, + "end_time": "2025-01-26T12:19:37.810010", + "exception": false, + "start_time": "2025-01-26T12:19:37.803308", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Text Preprocessing on Training Data

" + "
\n", + " \n", + "
\n", + " | Text Preprocessing on Training Data\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", "execution_count": 10, - "id": "e98bdce3", - "metadata": {}, + "id": "9252309a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:37.825185Z", + "iopub.status.busy": "2025-01-26T12:19:37.824776Z", + "iopub.status.idle": "2025-01-26T12:19:38.666444Z", + "shell.execute_reply": "2025-01-26T12:19:38.664460Z" + }, + "papermill": { + "duration": 0.851911, + "end_time": "2025-01-26T12:19:38.668779", + "exception": false, + "start_time": "2025-01-26T12:19:37.816868", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...\n", + "[nltk_data] Downloading package wordnet to /usr/share/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", + "[nltk_data] Downloading package wordnet2022 to /usr/share/nltk_data...\n", + "[nltk_data] Unzipping corpora/wordnet2022.zip.\n" + ] + } + ], + "source": [ + "import nltk\n", + "nltk.download('omw-1.4')\n", + "nltk.download('wordnet')\n", + "nltk.download('wordnet2022')\n", + "\n", + "! cp -rf /usr/share/nltk_data/corpora/wordnet2022 /usr/share/nltk_data/corpora/wordnet # temp fix for lookup error." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "eb229400", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:38.685010Z", + "iopub.status.busy": "2025-01-26T12:19:38.684566Z", + "iopub.status.idle": "2025-01-26T12:19:43.006857Z", + "shell.execute_reply": "2025-01-26T12:19:43.005687Z" + }, + "papermill": { + "duration": 4.332617, + "end_time": "2025-01-26T12:19:43.008866", + "exception": false, + "start_time": "2025-01-26T12:19:38.676249", + "status": "completed" + }, + "tags": [] + }, "outputs": [], "source": [ "lemmatizer = WordNetLemmatizer()\n", @@ -423,9 +768,24 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "d6885a74", - "metadata": {}, + "execution_count": 12, + "id": "0086da9a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:43.026076Z", + "iopub.status.busy": "2025-01-26T12:19:43.025599Z", + "iopub.status.idle": "2025-01-26T12:19:43.035485Z", + "shell.execute_reply": "2025-01-26T12:19:43.034318Z" + }, + "papermill": { + "duration": 0.020313, + "end_time": "2025-01-26T12:19:43.037248", + "exception": false, + "start_time": "2025-01-26T12:19:43.016935", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -498,7 +858,7 @@ "4 retrieve data using singletable sql query in t... " ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -509,17 +869,47 @@ }, { "cell_type": "markdown", - "id": "51516e56", - "metadata": {}, + "id": "f1ea0e47", + "metadata": { + "papermill": { + "duration": 0.006812, + "end_time": "2025-01-26T12:19:43.051737", + "exception": false, + "start_time": "2025-01-26T12:19:43.044925", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Text Vectorization (TF-IDF)

" + "
\n", + " \n", + "
\n", + " | Text Vectorization (TF-IDF)\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "aab37f89", - "metadata": {}, + "execution_count": 13, + "id": "8b8a0dd0", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:43.067820Z", + "iopub.status.busy": "2025-01-26T12:19:43.067451Z", + "iopub.status.idle": "2025-01-26T12:19:43.622174Z", + "shell.execute_reply": "2025-01-26T12:19:43.620751Z" + }, + "papermill": { + "duration": 0.565277, + "end_time": "2025-01-26T12:19:43.624203", + "exception": false, + "start_time": "2025-01-26T12:19:43.058926", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -537,17 +927,47 @@ }, { "cell_type": "markdown", - "id": "42cdb85d", - "metadata": {}, + "id": "896a8be2", + "metadata": { + "papermill": { + "duration": 0.007039, + "end_time": "2025-01-26T12:19:43.638989", + "exception": false, + "start_time": "2025-01-26T12:19:43.631950", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Apply SVD on TF-IDF

" + "
\n", + " \n", + "
\n", + " | Apply SVD on TF-IDF\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "493e2f7d", - "metadata": {}, + "execution_count": 14, + "id": "36956ac5", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:43.654506Z", + "iopub.status.busy": "2025-01-26T12:19:43.654170Z", + "iopub.status.idle": "2025-01-26T12:19:44.185602Z", + "shell.execute_reply": "2025-01-26T12:19:44.184556Z" + }, + "papermill": { + "duration": 0.541124, + "end_time": "2025-01-26T12:19:44.187256", + "exception": false, + "start_time": "2025-01-26T12:19:43.646132", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -567,25 +987,54 @@ }, { "cell_type": "markdown", - "id": "62f43488", - "metadata": {}, + "id": "3c0a53d7", + "metadata": { + "papermill": { + "duration": 0.006881, + "end_time": "2025-01-26T12:19:44.201597", + "exception": false, + "start_time": "2025-01-26T12:19:44.194716", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Cosine Similarity and Recommendations

" + "
\n", + " \n", + "
\n", + " | Cosine Similarity and Recommendations\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "ab0769d7", + "execution_count": 15, + "id": "ae0ef77e", "metadata": { - "scrolled": true + "execution": { + "iopub.execute_input": "2025-01-26T12:19:44.217431Z", + "iopub.status.busy": "2025-01-26T12:19:44.217095Z", + "iopub.status.idle": "2025-01-26T12:19:44.299935Z", + "shell.execute_reply": "2025-01-26T12:19:44.298718Z" + }, + "papermill": { + "duration": 0.093083, + "end_time": "2025-01-26T12:19:44.301865", + "exception": false, + "start_time": "2025-01-26T12:19:44.208782", + "status": "completed" + }, + "scrolled": true, + "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.023688637916059643\n" + "0.02311163293204528\n" ] } ], @@ -596,17 +1045,47 @@ }, { "cell_type": "markdown", - "id": "62948f78", - "metadata": {}, + "id": "431dea86", + "metadata": { + "papermill": { + "duration": 0.007233, + "end_time": "2025-01-26T12:19:44.316691", + "exception": false, + "start_time": "2025-01-26T12:19:44.309458", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Functions for Recommendation

" + "
\n", + " \n", + "
\n", + " | Functions for Recommendation\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "9333fc14", - "metadata": {}, + "execution_count": 16, + "id": "c1777e5f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:44.332918Z", + "iopub.status.busy": "2025-01-26T12:19:44.332520Z", + "iopub.status.idle": "2025-01-26T12:19:44.337473Z", + "shell.execute_reply": "2025-01-26T12:19:44.336419Z" + }, + "papermill": { + "duration": 0.014922, + "end_time": "2025-01-26T12:19:44.339075", + "exception": false, + "start_time": "2025-01-26T12:19:44.324153", + "status": "completed" + }, + "tags": [] + }, "outputs": [], "source": [ "def normalize_rating(rating_str):\n", @@ -622,11 +1101,26 @@ { "cell_type": "code", "execution_count": 17, - "id": "a990f12f", - "metadata": {}, + "id": "00067d2e", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:44.355243Z", + "iopub.status.busy": "2025-01-26T12:19:44.354893Z", + "iopub.status.idle": "2025-01-26T12:19:44.362354Z", + "shell.execute_reply": "2025-01-26T12:19:44.361083Z" + }, + "papermill": { + "duration": 0.01768, + "end_time": "2025-01-26T12:19:44.364169", + "exception": false, + "start_time": "2025-01-26T12:19:44.346489", + "status": "completed" + }, + "tags": [] + }, "outputs": [], "source": [ - "def get_recommendations(course_name, data, similarity_matrix, top_n=3, threshold=90, rating_weight=0.05):\n", + "def get_recommendations(course_name, data, similarity_matrix, top_n=3, rating_weight=0.05):\n", " \"\"\"\n", " Get top N course recommendations based on similarity to the given course name.\n", " \"\"\"\n", @@ -656,8 +1150,23 @@ { "cell_type": "code", "execution_count": 18, - "id": "201f8d31", - "metadata": {}, + "id": "708f7a40", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:44.380487Z", + "iopub.status.busy": "2025-01-26T12:19:44.380135Z", + "iopub.status.idle": "2025-01-26T12:19:44.390490Z", + "shell.execute_reply": "2025-01-26T12:19:44.389460Z" + }, + "papermill": { + "duration": 0.020248, + "end_time": "2025-01-26T12:19:44.392025", + "exception": false, + "start_time": "2025-01-26T12:19:44.371777", + "status": "completed" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -667,22 +1176,22 @@ " 'rating': '4.8',\n", " 'institution': 'IESE Business School',\n", " 'difficulty_level': 'Intermediate',\n", - " 'similarity': np.float64(1.0),\n", - " 'final_score': np.float64(0.998)},\n", + " 'similarity': 1.0000000000000004,\n", + " 'final_score': 0.9980000000000004},\n", " {'course_name': 'Finance for Non-Financial Professionals',\n", " 'course_url': 'https://www.coursera.org/learn/finance-for-non-finance-managers',\n", " 'rating': '4.5',\n", " 'institution': 'University of California, Irvine',\n", " 'difficulty_level': 'Conversant',\n", - " 'similarity': np.float64(0.832940692838965),\n", - " 'final_score': np.float64(0.8362936581970167)},\n", + " 'similarity': 0.833306939311862,\n", + " 'final_score': 0.8366415923462689},\n", " {'course_name': 'Finance for Non-Financial Managers',\n", " 'course_url': 'https://www.coursera.org/learn/finance-for-non-financial-managers',\n", " 'rating': '4.2',\n", " 'institution': 'Emory University',\n", " 'difficulty_level': 'Beginner',\n", - " 'similarity': np.float64(0.8315387472926317),\n", - " 'final_score': np.float64(0.8319618099280001)}]" + " 'similarity': 0.8315951404609806,\n", + " 'final_score': 0.8320153834379316}]" ] }, "execution_count": 18, @@ -696,28 +1205,97 @@ }, { "cell_type": "markdown", - "id": "ed13d0b8", - "metadata": {}, + "id": "381e039a", + "metadata": { + "papermill": { + "duration": 0.007172, + "end_time": "2025-01-26T12:19:44.406982", + "exception": false, + "start_time": "2025-01-26T12:19:44.399810", + "status": "completed" + }, + "tags": [] + }, "source": [ - "

Save the Model

" + "
\n", + " \n", + "
\n", + " | Save the Model\n", + "
\n", + "
\n", + "\n" ] }, { "cell_type": "code", "execution_count": 19, - "id": "8c3a275a", - "metadata": {}, + "id": "b85745ff", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-26T12:19:44.423651Z", + "iopub.status.busy": "2025-01-26T12:19:44.423279Z", + "iopub.status.idle": "2025-01-26T12:19:44.607209Z", + "shell.execute_reply": "2025-01-26T12:19:44.606006Z" + }, + "papermill": { + "duration": 0.194881, + "end_time": "2025-01-26T12:19:44.609248", + "exception": false, + "start_time": "2025-01-26T12:19:44.414367", + "status": "completed" + }, + "tags": [] + }, "outputs": [], "source": [ "pickle.dump(similarity_matrix, open('similarity_matrix.pkl', 'wb'))" ] + }, + { + "cell_type": "markdown", + "id": "3446943a", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-25T20:33:04.637733Z", + "iopub.status.busy": "2025-01-25T20:33:04.637380Z", + "iopub.status.idle": "2025-01-25T20:33:04.644873Z", + "shell.execute_reply": "2025-01-25T20:33:04.643392Z", + "shell.execute_reply.started": "2025-01-25T20:33:04.637708Z" + }, + "papermill": { + "duration": 0.0073, + "end_time": "2025-01-26T12:19:44.624451", + "exception": false, + "start_time": "2025-01-26T12:19:44.617151", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "

Thank You for Your Attention! 😊

Please give a πŸ‘ if you liked it!

" + ] } ], "metadata": { + "kaggle": { + "accelerator": "none", + "dataSources": [ + { + "datasetId": 1864990, + "sourceId": 3045510, + "sourceType": "datasetVersion" + } + ], + "dockerImageVersionId": 30839, + "isGpuEnabled": false, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, "kernelspec": { - "display_name": "LearnStream", + "display_name": "Python 3", "language": "python", - "name": "learnstream" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -729,7 +1307,19 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 14.746271, + "end_time": "2025-01-26T12:19:45.653814", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2025-01-26T12:19:30.907543", + "version": "2.6.0" } }, "nbformat": 4,