Skip to content

Commit b72a85a

Browse files
committed
add garbage_classification
1 parent ad77dd5 commit b72a85a

File tree

10 files changed

+359
-6
lines changed

10 files changed

+359
-6
lines changed

ComputerVision/.gitkeep

Whitespace-only changes.

DataScience/.gitkeep

Whitespace-only changes.

Homework/qiyue

-1
This file was deleted.

NLP/.gitkeep

Whitespace-only changes.

PythonExercise/ComputerVision/.gitkeep

Whitespace-only changes.

PythonExercise/DataScience/.gitkeep

Whitespace-only changes.

PythonExercise/DataScience/garbage_classification/.gitkeep

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Image Classification (CNN - keras)"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"# try the process of implementing CNN with Keras to classify images \n",
17+
"# 1. import useful packages \n",
18+
"# 2. load the data before visualize and preprocess it \n",
19+
"# 3. try a simplt CNN moodel and then evaluate its performances \n",
20+
"# 4. use techniques such as data augmentation, learning rate decay and dropout to increase our model's accuracy \n"
21+
]
22+
},
23+
{
24+
"cell_type": "markdown",
25+
"metadata": {},
26+
"source": [
27+
"## with applications to Garbage Sorting "
28+
]
29+
},
30+
{
31+
"cell_type": "markdown",
32+
"metadata": {},
33+
"source": [
34+
"### import packages "
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 2,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"import numpy as np # linear algebra \n",
44+
"import pandas as pd # data processing \n",
45+
"import os "
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": null,
51+
"metadata": {},
52+
"outputs": [],
53+
"source": [
54+
"\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": 33,
60+
"metadata": {},
61+
"outputs": [],
62+
"source": [
63+
"import matplotlib.pyplot as plt\n",
64+
"import cv2 # image processing package\n",
65+
"# keras for model\n",
66+
"import keras\n",
67+
"from keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense\n",
68+
"from keras.models import Sequential\n",
69+
"from sklearn.utils import shuffle\n",
70+
"import random\n"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": 34,
76+
"metadata": {},
77+
"outputs": [],
78+
"source": [
79+
"# this is where all the images are stored\n",
80+
"# users need to change the file path here \n",
81+
"# under this file path, we have four folders, each will have a category for garbage sorting \n",
82+
"# put images under corresponding folder \n",
83+
"train_dir = \"../yzheng070/Desktop/seg_train\"\n"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"metadata": {},
90+
"outputs": [],
91+
"source": [
92+
"# define how many classes we have \n",
93+
"classes = ['dry', 'wet', 'hazardous', 'recycle']\n",
94+
"len(classes)\n"
95+
]
96+
},
97+
{
98+
"cell_type": "markdown",
99+
"metadata": {},
100+
"source": [
101+
"### read image and visualize some here \n"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": null,
107+
"metadata": {},
108+
"outputs": [],
109+
"source": [
110+
"# read image and visualize some here \n",
111+
"one_from_each = []\n",
112+
"titles = []\n",
113+
"classes = os.listdir(train_dir)\n",
114+
"n_classes = len(classes)\n",
115+
"for x in classes:\n",
116+
" unique_img_dir = train_dir + '/' + x\n",
117+
" temp_directory = os.listdir(unique_img_dir)\n",
118+
" temp_img = unique_img_dir + '/' + temp_directory[random.randint(1,10)]\n",
119+
" image = cv2.imread(temp_img)\n",
120+
" image = np.array(image)\n",
121+
" image = image.astype('float32')/255.0\n",
122+
" one_from_each.append(image)\n",
123+
" titles.append(x)\n",
124+
" \n",
125+
"for i in range(5):\n",
126+
" imageshow = one_from_each[i]\n",
127+
" plt.imshow(imageshow[:,:,::-1])\n",
128+
" plt.title(titles[i])\n",
129+
" plt.show()"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": null,
135+
"metadata": {},
136+
"outputs": [],
137+
"source": [
138+
"# define labels \n",
139+
"print(classes)\n",
140+
"labels_dict = {0:classes[0],\n",
141+
" 1:classes[1],\n",
142+
" 2:classes[2],\n",
143+
" 3:classes[3]\n",
144+
" }"
145+
]
146+
},
147+
{
148+
"cell_type": "markdown",
149+
"metadata": {},
150+
"source": [
151+
"### load data "
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": null,
157+
"metadata": {},
158+
"outputs": [],
159+
"source": [
160+
"# define a load data function \n",
161+
"# to process it for modeling \n",
162+
"\n",
163+
"def load_data(directory):\n",
164+
" size = 150,150\n",
165+
" images = []\n",
166+
" labels = []\n",
167+
" \n",
168+
" for folder in os.listdir(directory):\n",
169+
" print(\"Loading images from : \",folder, \": \", end=\"\")\n",
170+
" for file in os.listdir(directory + \"/\" + folder):\n",
171+
" img_path = directory + \"/\" + folder + \"/\" + file\n",
172+
" curr_img = cv2.imread(img_path)\n",
173+
" curr_img = cv2.resize(curr_img, size)\n",
174+
" images.append(curr_img)\n",
175+
" if folder == labels_dict[0]:\n",
176+
" current_label = 0\n",
177+
" elif folder == labels_dict[1]:\n",
178+
" current_label = 1\n",
179+
" elif folder == labels_dict[2]:\n",
180+
" current_label = 2\n",
181+
" elif folder == labels_dict[3]:\n",
182+
" current_label = 3\n",
183+
" \n",
184+
" labels.append(current_label)\n",
185+
" print(\"completed\")\n",
186+
" \n",
187+
" images, labels = shuffle(images, labels)\n",
188+
" \n",
189+
" images = np.array(images)\n",
190+
" images = images.astype('float32')/255.0\n",
191+
" labels = np.array(labels)\n",
192+
" labels = keras.utils.to_categorical(labels, n_classes)\n",
193+
" \n",
194+
" return images, labels"
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": null,
200+
"metadata": {},
201+
"outputs": [],
202+
"source": [
203+
"X_train, Y_train = load_data(train_dir)"
204+
]
205+
},
206+
{
207+
"cell_type": "markdown",
208+
"metadata": {},
209+
"source": [
210+
"### CNN Model"
211+
]
212+
},
213+
{
214+
"cell_type": "code",
215+
"execution_count": null,
216+
"metadata": {},
217+
"outputs": [],
218+
"source": [
219+
"# modeling \n",
220+
"# using CNN \n",
221+
"# Convolutional Neural Network \n",
222+
"\n",
223+
"model = Sequential()\n",
224+
"\n",
225+
"model.add(Conv2D(32, kernel_size =[5,5], strides = 2, activation = 'relu', input_shape = (150,150,3)))\n",
226+
"model.add(MaxPool2D(pool_size = [2,2], strides = 2))\n",
227+
"model.add(Conv2D(64, kernel_size = [3,3], padding = 'same', activation = \"relu\"))\n",
228+
"model.add(Conv2D(64, kernel_size = [3,3], padding = 'same', activation = \"relu\"))\n",
229+
"model.add(MaxPool2D(pool_size = [2,2], strides = 2))\n",
230+
"model.add(Conv2D(128, kernel_size = [3,3], activation = \"relu\"))\n",
231+
"model.add(Conv2D(128, kernel_size = [3,3], activation = \"relu\"))\n",
232+
"model.add(MaxPool2D(pool_size = [2,2], strides = 2))\n",
233+
"model.add(Conv2D(256, kernel_size = [3,3], activation = \"relu\"))\n",
234+
"model.add(Dropout(0.5))\n",
235+
"model.add(Flatten())\n",
236+
"model.add(Dense(512, activation = 'relu'))\n",
237+
"model.add(Dense(n_classes, activation = 'softmax'))\n",
238+
"\n",
239+
"model.summary()"
240+
]
241+
},
242+
{
243+
"cell_type": "markdown",
244+
"metadata": {},
245+
"source": [
246+
"### Validation"
247+
]
248+
},
249+
{
250+
"cell_type": "code",
251+
"execution_count": null,
252+
"metadata": {},
253+
"outputs": [],
254+
"source": [
255+
"# model validation\n",
256+
"model.compile(loss = \"categorical_crossentropy\", optimizer = \"adam\", metrics = [\"accuracy\"])\n",
257+
"model_hist = model.fit(X_train, Y_train, epochs = 10, validation_split = 0.1, batch_size = 32)"
258+
]
259+
},
260+
{
261+
"cell_type": "code",
262+
"execution_count": null,
263+
"metadata": {},
264+
"outputs": [],
265+
"source": []
266+
},
267+
{
268+
"cell_type": "code",
269+
"execution_count": null,
270+
"metadata": {},
271+
"outputs": [],
272+
"source": [
273+
"# plot the accuracy \n",
274+
"plt.plot(model_hist.history['acc'])\n",
275+
"plt.plot(model_hist.history['val_acc'])\n",
276+
"plt.title(\"training vs Validation accuracy\")\n",
277+
"plt.legend(['train acc.','validation acc.'], loc = 'lower right')\n",
278+
"plt.xlabel(\"Epoch\")\n",
279+
"plt.ylabel(\"Accuracy\")\n",
280+
"plt.show()\n",
281+
"\n",
282+
"plt.plot(model_hist.history['loss'])\n",
283+
"plt.plot(model_hist.history['val_loss'])\n",
284+
"plt.title(\"Loss plot (train vs validation)\")\n",
285+
"plt.legend(['training loss','validation loss'], loc = 'upper right')\n",
286+
"plt.xlabel(\"Epoch\")\n",
287+
"plt.ylabel(\"Loss\")\n",
288+
"plt.show()\n"
289+
]
290+
},
291+
{
292+
"cell_type": "markdown",
293+
"metadata": {},
294+
"source": [
295+
"### Testing "
296+
]
297+
},
298+
{
299+
"cell_type": "code",
300+
"execution_count": null,
301+
"metadata": {},
302+
"outputs": [],
303+
"source": [
304+
"# test data\n",
305+
"# put model on test data to check results \n",
306+
"\n",
307+
"# get test data \n",
308+
"# users need to change to your path \n",
309+
"test_dir = \"../yzheng070/Desktop/seg_test\"\n",
310+
"\n",
311+
"X_test, Y_test = load_data(test_dir)"
312+
]
313+
},
314+
{
315+
"cell_type": "code",
316+
"execution_count": null,
317+
"metadata": {},
318+
"outputs": [],
319+
"source": [
320+
"# get accuracy on test data \n",
321+
"metrics = model.evaluate(X_test, Y_test)\n",
322+
"print(\"Model metrics = \",model.metrics_names)\n",
323+
"print(\"Testing Accuracy = \", metrics[1])"
324+
]
325+
}
326+
],
327+
"metadata": {
328+
"kernelspec": {
329+
"display_name": "Python 3",
330+
"language": "python",
331+
"name": "python3"
332+
},
333+
"language_info": {
334+
"codemirror_mode": {
335+
"name": "ipython",
336+
"version": 3
337+
},
338+
"file_extension": ".py",
339+
"mimetype": "text/x-python",
340+
"name": "python",
341+
"nbconvert_exporter": "python",
342+
"pygments_lexer": "ipython3",
343+
"version": "3.7.3"
344+
}
345+
},
346+
"nbformat": 4,
347+
"nbformat_minor": 2
348+
}

PythonExercise/NLP/.gitkeep

Whitespace-only changes.

README.md

+11-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# You-are-Pythonista
2-
汇聚【从零单排】【实战项目】【Python面试题】【大航海】【Python应用】【Python错题集】【技术沙龙】【内推渠道】等等
2+
汇聚【从零单排】【实战项目】【数据科学】【自然语言处理】【计算机视觉】【面试题系列】【大航海】【Python应用】【错题集】【技术沙龙】【内推渠道】等等
33

44
【人人都是Pythonista】由公众号【Python专栏】推出,请认准唯一标识:
55

@@ -10,12 +10,18 @@
1010
请仔细阅读本文档,尤其是**使用说明**
1111

1212
# 目录说明
13-
- LearnFromZero: 从零学习Python
14-
- PythonExercise: Python练习、应用
15-
- PracticeProject: 实战项目
13+
- ComputerVision: 计算机视觉
14+
- DataScience: 数据科学
15+
- Homework: 所有的作业都提交在这个目录下,每个人创建属于自己的独立目录
16+
- HR: 内推渠道
17+
- KnowledgeShare: 干货分享
18+
- LearnFromZero: 从零单排
19+
- NLP: Natual Language Processing, 自然语言处理
1620
- OnePiece: 大航海
21+
- PracticeProject: 实战项目
22+
- PythonExercise: Python练习、应用
23+
- Resource: 资源目录
1724
- TechSalon: 技术沙龙
18-
- HR: 内推渠道
1925

2026
# 使用说明
2127
## 命名规范

0 commit comments

Comments
 (0)