Skip to content
This repository was archived by the owner on May 10, 2023. It is now read-only.

Commit 666968b

Browse files
committed
Adding preprocessing step
1 parent fd26a27 commit 666968b

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

Part 1 - Data Preprocessing/Data.csv

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Country,Age,Salary,Purchased
2+
France,44,72000,No
3+
Spain,27,48000,Yes
4+
Germany,30,54000,No
5+
Spain,38,61000,No
6+
Germany,40,,Yes
7+
France,35,58000,Yes
8+
Spain,,52000,No
9+
France,48,79000,Yes
10+
Germany,50,83000,No
11+
France,37,67000,Yes
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Mon Jul 16 23:11:13 2018
4+
5+
@author: Mohammad Doosti Lakhani
6+
"""
7+
8+
# imporing libraries
9+
import numpy as np
10+
import matplotlib.pyplot as plt
11+
import pandas as pd
12+
13+
# importing dataset
14+
dataset = pd.read_csv('Data.csv')
15+
x = dataset.iloc[:,:-1].values
16+
y = dataset.iloc[:,3].values
17+
18+
# resolving missing data
19+
from sklearn.preprocessing import Imputer
20+
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis= 0)
21+
imputer = imputer.fit(x[:,1:3])
22+
x[:,1:3] = imputer.transform(x[:,1:3])
23+
24+
# encoding categorial data types to labelEncoder and onehotencoder
25+
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
26+
labelencoder_x = LabelEncoder()
27+
labelencoder_x = labelencoder_x.fit(x[:,0])
28+
x[:,0] = labelencoder_x.transform(x[:,0])
29+
30+
labelencoder_y = LabelEncoder()
31+
labelencoder_y = labelencoder_y.fit(y)
32+
y = labelencoder_y.transform(y)
33+
34+
onehotencoder_x = OneHotEncoder(categorical_features=[0])
35+
onehotencoder_x = onehotencoder_x.fit(x)
36+
x = onehotencoder_x.transform(x).toarray()
37+
38+
# splitting dataset into Train set and Test set
39+
from sklearn.model_selection import train_test_split
40+
x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.8 , random_state=0)
41+
42+
# feature scaling
43+
from sklearn.preprocessing import StandardScaler
44+
standardscaler_x = StandardScaler()
45+
standardscaler_x = standardscaler_x.fit(x_train)
46+
x_train = standardscaler_x.transform(x_train)
47+
x_test = standardscaler_x.transform(x_test)
48+

0 commit comments

Comments
 (0)