Skip to content

Commit

Permalink
add feature extraction & comments & traditional methods
Browse files Browse the repository at this point in the history
  • Loading branch information
shinshiner committed Jun 14, 2018
1 parent 8f55b70 commit abf130a
Show file tree
Hide file tree
Showing 13 changed files with 129 additions and 36 deletions.
4 changes: 2 additions & 2 deletions config/mlp.json → config/dnn.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"model_name": "mlp",
"model_name": "dnn",
"flatten": true,
"seed": 233,
"epochs": 100000,
Expand All @@ -14,5 +14,5 @@
"lr_decay_epoch": [150, 225, 300],
"lr_decay_rate": 0.1,
"criterion": "CrossEntropyLoss",
"trainer": "mlp_trainer"
"trainer": "dnn_trainer"
}
Binary file removed data/t10k-images-idx3-ubyte.gz
Binary file not shown.
Binary file removed data/t10k-labels-idx1-ubyte.gz
Binary file not shown.
Binary file removed data/train-images-idx3-ubyte.gz
Binary file not shown.
Binary file removed data/train-labels-idx1-ubyte.gz
Binary file not shown.
80 changes: 48 additions & 32 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from utils import init_dir, show_config, setup_logger

parser = argparse.ArgumentParser(description="MNIST classifiers")
parser.add_argument("--method", type=str, default="mlp")
parser.add_argument("--traditional-methods", type=bool, default=False)
parser.add_argument("--method", type=str, default="dnn")
parser.add_argument("--feature-extracting-method", type=str, default=None)

parser.add_argument("--resume", action="store_true")
parser.add_argument("--test", action="store_true")
Expand All @@ -23,35 +25,49 @@

if __name__ == "__main__":
args = parser.parse_args()
with open(os.path.join(args.config_dir, "%s.json" % args.method)) as f:
config = json.load(f)
for arg in vars(args):
if arg not in config.keys():
config[arg] = getattr(args, arg)
show_config(config)

init_dir(args.model_dir)
init_dir(args.log_dir)
np.random.seed(config["seed"])
torch.manual_seed(config["seed"])
torch.set_default_tensor_type('torch.FloatTensor')

data = MnistLoader(flatten=config["flatten"], data_path=args.data_dir)
model = locate("models.%s.%s" % (args.method, config["model_name"]))()
if args.resume or args.test:
model_path = os.path.join(config["model_dir"], "%s_model.pth" % config["method"])
if os.path.exists(model_path):
print("Loading latest model from %s" % model_path)
model.load_state_dict(torch.load(model_path))
if args.cuda and torch.cuda.is_available():
model = model.cuda()
model.train()
optimizer = locate("torch.optim.%s" % config["optimizer_type"])(model.parameters(), **config["optimizer"])
logger = setup_logger(args.method, os.path.join(args.log_dir, "%s.log" % args.method), resume=args.resume)

if args.test:
f = locate("trainers.%s.test" % config["trainer"])
else:
f = locate("trainers.%s.train" % config["trainer"])
f(data, model, optimizer, logger, config)

if args.traditional_methods: # apply traditional classification methods on MNIST
os.system('python3 traditional-methods/%s' % args.method)
else: # using DNNs or CNNs
with open(os.path.join(args.config_dir, "%s.json" % args.method)) as f:
config = json.load(f)
for arg in vars(args):
if arg not in config.keys():
config[arg] = getattr(args, arg)
show_config(config)

# initialization
init_dir(args.model_dir)
init_dir(args.log_dir)
np.random.seed(config["seed"])
torch.manual_seed(config["seed"])
torch.set_default_tensor_type('torch.FloatTensor')

# load data
data = MnistLoader(flatten=config["flatten"], data_path=args.data_dir)

# apply feature extraction on data
if args.feature_extracting_method != None:
data.data_train = locate("utils.%s" % args.feature_extracting_method)
data.data_test = locate("utils.%s" % args.feature_extracting_method

model = locate("models.%s.%s" % (args.method, config["model_name"]))()
if args.resume or args.test:
model_path = os.path.join(config["model_dir"], "%s_model.pth" % config["method"])
if os.path.exists(model_path):
print("Loading latest model from %s" % model_path)
model.load_state_dict(torch.load(model_path))
if args.cuda and torch.cuda.is_available():
model = model.cuda()
model.train()
optimizer = locate("torch.optim.%s" % config["optimizer_type"])(model.parameters(), **config["optimizer"])
logger = setup_logger(args.method, os.path.join(args.log_dir, "%s.log" % args.method), resume=args.resume)

if args.test:
f = locate("trainers.%s.test" % config["trainer"])
else:
f = locate("trainers.%s.train" % config["trainer"])

# start to train or test model
f(data, model, optimizer, logger, config)

4 changes: 2 additions & 2 deletions models/mlp.py → models/dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from torch.autograd import Variable
from utils import weights_init

class mlp(nn.Module):
class dnn(nn.Module):
def __init__(self, in_features=2025, classes=10, activation='relu'):
super(mlp, self).__init__()
super(dnn, self).__init__()
self.activation = activation

self.fc1 = nn.Linear(in_features, 500)
Expand Down
20 changes: 20 additions & 0 deletions traditional_methods/NaiveBayes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from sklearn.naive_bayes import GaussianNB

import sys
sys.path.append('/home/shin/mlp')

from data_loader import MnistLoader

def NB():
loader = MnistLoader(flatten=True, data_path='../data', var_per=None)
model = GaussianNB()

model.fit(loader.data_train, loader.label_train)
print('model trained')
res = model.score(loader.data_test, loader.label_test)
print(res)

return res

if __name__ == '__main__':
NB()
20 changes: 20 additions & 0 deletions traditional_methods/SGD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from sklearn.linear_model import SGDClassifier

import sys
sys.path.append('/home/shin/mlp')

from data_loader import MnistLoader

def SGD():
loader = MnistLoader(flatten=True, data_path='../data', var_per=None)
model = SGDClassifier(max_iter=30000)

model.fit(loader.data_train, loader.label_train)
print('model trained')
res = model.score(loader.data_test, loader.label_test)
print(res)

return res

if __name__ == '__main__':
SGD()
Empty file removed traditional_methods/run.py
Empty file.
Empty file removed traditional_methods/utils.py
Empty file.
7 changes: 7 additions & 0 deletions trainers/mlp_trainer.py → trainers/dnn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,27 @@ def train(data, model, optimizer, logger, config):
criterion = locate("torch.nn.%s" % config["criterion"])()
if torch.cuda.is_available():
criterion = criterion.cuda()

# start to training
for epoch in range(config["last_epoch"] + 1, config["epochs"] + 1):
# get one batch data
batch_indices = np.random.choice(data.DATA_SIZE[0], size=config["batch_size"], replace=False)
inputs = Variable(torch.from_numpy(data.data_train[batch_indices, :]), requires_grad=False)
targets = Variable(torch.from_numpy(data.label_train[batch_indices]), requires_grad=False)
if torch.cuda.is_available():
inputs = inputs.cuda()
targets = targets.cuda()

# forward & calculate loss
outputs = model(inputs)
loss = criterion(outputs, targets)

# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()

# test the performance of current model
batches = (data.DATA_SIZE[1] + config["batch_size"] - 1) // config["batch_size"]
prediction = np.zeros(data.DATA_SIZE[1], dtype=np.uint8)
for param in model.parameters():
Expand Down
30 changes: 30 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from torch.autograd import Variable
import logging

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import FactorAnalysis, FastICA, PCA, NMF, LatentDirichletAllocation

def init_dir(dir):
if not os.path.isdir(dir):
os.mkdir(dir)
Expand Down Expand Up @@ -50,5 +53,32 @@ def show_config(config):
print(' %s: %s' % (key, str(config[key])))
print('========================================')

# Feature Extraction
def FA(data, dim):
fa = FactorAnalysis(n_components=dim)
fa.fit(data)
return fa.transform(data)

def ICA(data, dim):
ica = FastICA(n_components=dim)
ica.fit(data)
return ica.transform(data)

def skPCA(data, dim):
model = PCA(n_components=dim)
model.fit(data)
return model.transform(data)

def skNMF(data, dim):
model = NMF(n_components=dim)
model.fit(data)
return model.transform(data)

# Max-min norm
def max_min(data):
model = MinMaxScaler()
model.fit(data)
return model.transform(data)

if __name__ == "__main__":
print(latest_model("trained_models", "drop_connect"))

0 comments on commit abf130a

Please sign in to comment.