load_data.py: fix typo

taspinar · taspinar · commit 98d77cf44c78 · 2018-04-14T14:38:09.000+02:00
diff --git a/load_data.py b/load_data.py
@@ -1,96 +1,96 @@
-from mnist import MNIST
-from utils import *
-from six.moves.urllib.request import urlopen
-import gzip, tarfile
-from shutil import move
-
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-
-SOURCE_URL_MNIST = 'http://yann.lecun.com/exdb/mnist/'
-SOURCE_URL_CIFAR10 = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
-SOURCE_URL_OXFLOWER17 = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/17/17flowers.tgz'
-
-MNIST_FILES = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']
-
-CIFAR10_TRAIN_DATASETS = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', ]
-CIFAR10_TEST_DATASETS = ['test_batch']
-CIFAR_10_GZ_FILE  = 'cifar-10-python.tar.gz'
-CIFAR_10_FOLDER = 'cifar-10-batches-py/'
-
-def unzip_download(download_response):
-    compressedFile = StringIO()
-    compressedFile.write(download_response.read())
-    compressedFile.seek(0)
-    decompressedFile = gzip.GzipFile(fileobj=compressedFile, mode='rb')
-    return decompressedFile
-
-def mnist(input_folder, image_width, image_height, image_depth):
-    if not os.path.exists(input_folder):
-        os.mkdir(input_folder)
-    
-    for filename in MNIST_FILES:
-        unzipped_filename = filename.split('.')[0]
-        if unzipped_filename not in os.listdir(input_folder):
-            print('Downloading MNIST file ', filename)
-            response = urlopen(SOURCE_URL_MNIST + filename)
-            with open(input_folder + unzipped_filename, 'wb') as outfile:
-                outfile.write(gzip.decompress(response.read()))
-            print('Succesfully downloaded and unzipped', filename)
-    print("Loading MNIST dataset...")
-    mndata = MNIST(input_folder)
-    train_dataset_, train_labels_ = mndata.load_training()
-    test_dataset_, test_labels_ = mndata.load_testing()
-    train_dataset, train_labels = reformat_data(train_dataset_, train_labels_, image_width, image_height, image_depth)
-    test_dataset, test_labels = reformat_data(test_dataset_, test_labels_, image_width, image_height, image_depth)
-    print("The MNIST training dataset contains {} images, each of size {}".format(train_dataset[:,:,:,:].shape[0], train_dataset[:,:,:,:].shape[1:]))
-    print("The MNIST test dataset contains {} images, each of size {}".format(test_dataset[:,:,:,:].shape[0], test_dataset[:,:,:,:].shape[1:]))
-    print("There are {} number of labels.".format(len(np.unique(train_labels_))))
+from mnist import MNIST
+from utils import *
+from six.moves.urllib.request import urlopen
+import gzip, tarfile
+from shutil import move
+
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+SOURCE_URL_MNIST = 'http://yann.lecun.com/exdb/mnist/'
+SOURCE_URL_CIFAR10 = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
+SOURCE_URL_OXFLOWER17 = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/17/17flowers.tgz'
+
+MNIST_FILES = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']
+
+CIFAR10_TRAIN_DATASETS = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', ]
+CIFAR10_TEST_DATASETS = ['test_batch']
+CIFAR_10_GZ_FILE  = 'cifar-10-python.tar.gz'
+CIFAR_10_FOLDER = 'cifar-10-batches-py/'
+
+def unzip_download(download_response):
+    compressedFile = StringIO()
+    compressedFile.write(download_response.read())
+    compressedFile.seek(0)
+    decompressedFile = gzip.GzipFile(fileobj=compressedFile, mode='rb')
+    return decompressedFile
+
+def mnist(input_folder, image_width, image_height, image_depth):
+    if not os.path.exists(input_folder):
+        os.makedirs(input_folder)
+    
+    for filename in MNIST_FILES:
+        unzipped_filename = filename.split('.')[0]
+        if unzipped_filename not in os.listdir(input_folder):
+            print('Downloading MNIST file ', filename)
+            response = urlopen(SOURCE_URL_MNIST + filename)
+            with open(input_folder + unzipped_filename, 'wb') as outfile:
+                outfile.write(gzip.decompress(response.read()))
+            print('Succesfully downloaded and unzipped', filename)
+    print("Loading MNIST dataset...")
+    mndata = MNIST(input_folder)
+    train_dataset_, train_labels_ = mndata.load_training()
+    test_dataset_, test_labels_ = mndata.load_testing()
+    train_dataset, train_labels = reformat_data(train_dataset_, train_labels_, image_width, image_height, image_depth)
+    test_dataset, test_labels = reformat_data(test_dataset_, test_labels_, image_width, image_height, image_depth)
+    print("The MNIST training dataset contains {} images, each of size {}".format(train_dataset[:,:,:,:].shape[0], train_dataset[:,:,:,:].shape[1:]))
+    print("The MNIST test dataset contains {} images, each of size {}".format(test_dataset[:,:,:,:].shape[0], test_dataset[:,:,:,:].shape[1:]))
+    print("There are {} number of labels.".format(len(np.unique(train_labels_))))
+    return train_dataset, train_labels, test_dataset, test_labels
+
+def cifar10(input_folder, image_width, image_height, image_depth):
+    if not os.path.exists(input_folder):
+        os.mkdir(input_folder)
+    
+    download_flag = False
+    for file in [CIFAR_10_GZ_FILE] + CIFAR10_TRAIN_DATASETS + CIFAR10_TEST_DATASETS:
+        if file not in os.listdir(input_folder):
+            download_flag = True
+            
+    if download_flag:
+        print("Downloading CIFAR10 dataset")
+        response = urlopen(SOURCE_URL_CIFAR10)
+        with open(input_folder + CIFAR_10_GZ_FILE, 'wb') as outfile:
+            outfile.write(response.read())
+        print('Succesfully downloaded and unzipped', CIFAR_10_GZ_FILE)
+        print("extracting files...")
+        tar = tarfile.open(input_folder + CIFAR_10_GZ_FILE)
+        tar.extractall(input_folder)
+        files = os.listdir(input_folder + CIFAR_10_FOLDER)
+        for file in files:
+            move(input_folder + CIFAR_10_FOLDER + file, input_folder + file)
+        os.rmdir(input_folder + CIFAR_10_FOLDER)
+    print("Loading Cifar-10 dataset")
+    with open(input_folder + CIFAR10_TEST_DATASETS[0], 'rb') as f0:
+        c10_test_dict = pickle.load(f0, encoding='bytes')
+
+    c10_test_dataset, c10_test_labels = c10_test_dict[b'data'], c10_test_dict[b'labels']
+    
+    c10_train_dataset, c10_train_labels = [], []
+    for train_dataset in CIFAR10_TRAIN_DATASETS:
+        with open(input_folder + train_dataset, 'rb') as f0:
+            c10_train_dict = pickle.load(f0, encoding='bytes')
+            c10_train_dataset_, c10_train_labels_ = c10_train_dict[b'data'], c10_train_dict[b'labels']
+            
+            c10_train_dataset.append(c10_train_dataset_)
+            c10_train_labels += c10_train_labels_
+
+    c10_train_dataset = np.concatenate(c10_train_dataset, axis=0)
+    test_dataset, test_labels = reformat_data(c10_test_dataset, c10_test_labels, image_width, image_height, image_depth)
+    train_dataset, train_labels = reformat_data(c10_train_dataset, c10_train_labels, image_width, image_height, image_depth)
+    print("The CIFAR-10 training dataset contains {} images, each of size {}".format(train_dataset[:,:,:,:].shape[0], train_dataset[:,:,:,:].shape[1:]))
+    print("The CIFAR-10 test dataset contains {} images, each of size {}".format(test_dataset[:,:,:,:].shape[0], test_dataset[:,:,:,:].shape[1:]))
+    print("There are {} number of labels.".format(len(np.unique(c10_train_labels))))
     return train_dataset, train_labels, test_dataset, test_labels
-
-def cifar10(input_folder, image_width, image_height, image_depth):
-    if not os.path.exists(input_folder):
-        os.mkdir(input_folder)
-    
-    download_flag = False
-    for file in [CIFAR_10_GZ_FILE] + CIFAR10_TRAIN_DATASETS + CIFAR10_TEST_DATASETS:
-        if file not in os.listdir(input_folder):
-            download_flag = True
-            
-    if download_flag:
-        print("Downloading CIFAR10 dataset")
-        response = urlopen(SOURCE_URL_CIFAR10)
-        with open(input_folder + CIFAR_10_GZ_FILE, 'wb') as outfile:
-            outfile.write(response.read())
-        print('Succesfully downloaded and unzipped', CIFAR_10_GZ_FILE)
-        print("extracting files...")
-        tar = tarfile.open(input_folder + CIFAR_10_GZ_FILE)
-        tar.extractall(input_folder)
-        files = os.listdir(input_folder + CIFAR_10_FOLDER)
-        for file in files:
-            move(input_folder + CIFAR_10_FOLDER + file, input_folder + file)
-        os.rmdir(input_folder + CIFAR_10_FOLDER)
-    print("Loading Cifar-10 dataset")
-    with open(input_folder + CIFAR10_TEST_DATASETS[0], 'rb') as f0:
-        c10_test_dict = pickle.load(f0, encoding='bytes')
-
-    c10_test_dataset, c10_test_labels = c10_test_dict[b'data'], c10_test_dict[b'labels']
-    
-    c10_train_dataset, c10_train_labels = [], []
-    for train_dataset in CIFAR10_TRAIN_DATASETS:
-        with open(input_folder + train_dataset, 'rb') as f0:
-            c10_train_dict = pickle.load(f0, encoding='bytes')
-            c10_train_dataset_, c10_train_labels_ = c10_train_dict[b'data'], c10_train_dict[b'labels']
-            
-            c10_train_dataset.append(c10_train_dataset_)
-            c10_train_labels += c10_train_labels_
-
-    c10_train_dataset = np.concatenate(c10_train_dataset, axis=0)
-    test_dataset, test_labels = reformat_data(c10_test_dataset, c10_test_labels, image_width, image_height, image_depth)
-    train_dataset, train_labels = reformat_data(c10_train_dataset, c10_train_labels, image_width, image_height, image_depth)
-    print("The CIFAR-10 training dataset contains {} images, each of size {}".format(train_dataset[:,:,:,:].shape[0], train_dataset[:,:,:,:].shape[1:]))
-    print("The CIFAR-10 test dataset contains {} images, each of size {}".format(test_dataset[:,:,:,:].shape[0], test_dataset[:,:,:,:].shape[1:]))
-    print("There are {} number of labels.".format(len(np.unique(c10_train_labels))))
-    return train_dataset, train_labels, test_dataset, test_labels