Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changes to incorporate local scoring for binomial #71

Open
wants to merge 7 commits into
base: gsoc19
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions mgwr/gwr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1484,7 +1484,8 @@ def _chunk_compute_R(self, chunk_id=0):

for i in range(n):
wi = self._build_wi(i, self.bw_init).reshape(-1, 1)
if isinstance(self.family, Poisson):
if isinstance(self.family, (Poisson, Binomial)):
#if isinstance(self.family, Poisson):
wi=wi.reshape(-1,1)
rslt = iwls(self.y, self.X, self.family, self.offset, None, wi=wi)
inv_xtx_xt = rslt[5]
Expand All @@ -1510,7 +1511,9 @@ def _chunk_compute_R(self, chunk_id=0):
for i in range(len(chunk_index_Aj)):
index = chunk_index_Aj[i]
wi = self._build_wi(index, self.bws_history[iter_i, j])
if isinstance(self.family, Poisson):

if isinstance(self.family, (Poisson, Binomial)):
#if isinstance(self.family, Poisson):
Xj = Xj.reshape(-1,1)
wi = wi.reshape(-1,1)
rslt = iwls(self.y, Xj, self.family, self.offset, None, wi=wi)
Expand Down Expand Up @@ -1559,7 +1562,9 @@ def fit(self, n_chunks=1, pool=None):
predy = self.offset*(np.exp(np.sum(self.X * params, axis=1).reshape(-1, 1)))

elif isinstance(self.family,Binomial):
predy = 1/(1+np.exp(-1*np.sum(self.X * params, axis=1).reshape(-1, 1)))
#XXB = np.multiply(param, X)
#p = np.exp(XXB) / ( 1 + np.exp (XXB))
predy = (np.exp(np.sum(self.X * params, axis=1).reshape(-1, 1)))/(1+np.exp(np.sum(self.X * params, axis=1).reshape(-1, 1)))

else:
predy = np.sum(self.X * params, axis=1).reshape(-1, 1)
Expand Down
96 changes: 49 additions & 47 deletions mgwr/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from copy import deepcopy
from spglm.family import Gaussian, Binomial, Poisson


def golden_section(a, c, delta, function, tol, max_iter, int_score=False,
verbose=False):
"""
Expand Down Expand Up @@ -180,20 +179,17 @@ def multi_bw(init, y, X, n, k, family, offset, tol, max_iter, rss_score, gwr_fun
bw_gwr = bw
err = optim_model.resid_response.reshape((-1, 1))
param = optim_model.params
old_param=param
ni = np.multiply(param, X)
p = np.exp(ni) / ( 1 + np.exp (ni))
w = (p)*(1-p)

if isinstance(family, Poisson):
XB = offset*np.exp(np.multiply(param,X))

elif isinstance(family, Binomial):
#v = np.multiply(X, param)
#XB = 1 / (1 + np.exp(-1 * v))
#XB = v + ((1 / (mu * (1 - mu))) * (y - mu))
m = np.multiply(param, X)
XB = 1 / ( 1 + np.exp (-1 * m))
#print("XB "+str(XB.shape))
#mu = 1 / ( 1 + np.exp (-1 * np.multiply (param, X)))
#XB = np.multiply(param, X)
#print(XB.shape)
#XB=np.log(XB/(1-XB))
XB = ni + ((y - p)/(p*(1-p)))

else:
XB = np.multiply(param, X)

Expand All @@ -215,28 +211,25 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range

for iters in tqdm(range(1, max_iter + 1), desc='Backfitting'):
new_XB = np.zeros_like(X)
new_p = np.zeros_like(X)
new_ni = np.zeros_like(X)
new_w = np.zeros_like(X)
new_add = np.zeros_like(X)

params = np.zeros_like(X)

for j in range(k):
temp_y = XB[:,j].reshape((-1, 1))
if isinstance(family, (Poisson,Gaussian)):
temp_y = temp_y + err

if isinstance(family, Binomial):
ni_old = np.log((XB[:,j])/(1-XB[:,j]))
temp_y = (ni_old + ((optim_model.y.reshape(-1) - XB[:,j].reshape(-1))/XB[:,j].reshape(-1)*(1-XB[:,j].reshape(-1)))).reshape(-1,1)
print(temp_y.shape)
#temp_y = ((XB[:,j] + (y - mu[:,j]))/(mu[:,j] * ( 1 - mu[:,j] )))
#print(temp_y.shape)
else:
temp_y = XB[:, j].reshape((-1, 1))
#temp_y = (1 / (1+np.exp((-1*(np.multiply(temp_X ,param.reshape(-1,1)))+err))).reshape(-1))

#temp_y = y_new
#temp_y = temp_y + err
#print(min(err))
temp_X = X[:, j].reshape((-1, 1))
temp_w = np.sqrt(w[:,j]).reshape((-1, 1))
temp_X_w = np.multiply(temp_X, temp_w)
temp_y_w = np.multiply(temp_y, temp_w)

if isinstance(family, Binomial):
bw_class = bw_func_g(temp_y, temp_X)
bw_class = bw_func_g(temp_y_w, temp_X_w)
else:
bw_class = bw_func(temp_y, temp_X)

Expand All @@ -251,41 +244,50 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range
bw_stable_counter = np.ones(k)

if isinstance(family, Binomial):
optim_model = gwr_func_g(temp_y, temp_X, bw)
optim_model = gwr_func_g(temp_y_w, temp_X_w, bw)

else:
optim_model = gwr_func(temp_y, temp_X, bw)
#err=(temp_y - optim_model.predy).reshape(-1,1)
#print(err.shape)
err = optim_model.resid_response.reshape((-1, 1))
#print(err.shape)

err = optim_model.resid_response.reshape((-1,1))
#Needs further investigation

param = optim_model.params.reshape((-1, ))
#print(param.shape)
#print(temp_X.shape)
#print(err.shape)
#new_XB[:, j] = 1 / (1+np.exp(-1*np.multiply(temp_X ,param)+err)).reshape(-1)
new_XB[:, j] = 1 / (1+np.exp((-1*(np.multiply(temp_X ,param.reshape(-1,1)))))).reshape(-1)
#new_XB[:,j] = optim_model.predy.reshape(-1)
#new_XB[:,j] = np.multiply(param, temp_X).reshape(-1)
#new_XB[:, j] = optim_model.predy.reshape(-1)
#neww_XB[:, j] = (1 / (1+np.exp((-1*(np.multiply(temp_X ,param.reshape(-1,1)))+err))).reshape(-1))
#new_XB[:, j] = np.log(new_XB[:,j]/(1-new_XB[:,j]))
params[:, j] = param

new_ni[:,j] = np.multiply(param.reshape(-1), temp_X_w.reshape(-1))
new_p[:,j] = np.exp(new_ni[:,j])/(1+(np.exp(new_ni[:,j])))

if isinstance(family, (Poisson, Gaussian)):
new_XB[:,j] = optim_model.predy.reshape(-1)
else:
new_XB[:,j] = new_ni[:,j] + ((y.reshape(-1) - new_p[:,j])/(new_p[:,j]*(1-new_p[:,j])))

new_w[:,j] = new_p[:,j]*(1-new_p[:,j])
params[:, j] = param*2
bws[j] = bw

num = np.sum((new_XB - XB)**2) / n
den = np.sum(np.sum(new_XB, axis=1)**2)
score = (num / den)**0.5
if isinstance(family, Binomial):
num = np.sum((params - old_param)**2) / n
den = np.sum(np.sum(params, axis=1)**2)
score = (num / den)**0.5
else:
num = np.sum((new_XB - XB)**2) / n
den = np.sum(np.sum(new_XB, axis=1)**2)
score = (num / den)**0.5

XB = new_XB
#mu = new_mu
#XXB = neww_XB
p = new_p
ni = new_ni
w = new_w
add = new_add
old_param = params

if rss_score:
if isinstance(family, Poisson):
predy = offset*(np.exp(np.sum(X * params, axis=1).reshape(-1, 1)))

elif isinstance(family,Binomial):
predy = 1/(1+np.exp(-1*np.sum(X * params, axis=1).reshape(-1, 1)))
predy = (np.exp(np.sum(X * params, axis=1).reshape(-1, 1)))/(1+np.exp(np.sum(X * params, axis=1).reshape(-1, 1)))

else:
predy = np.sum(np.multiply(params, X), axis=1).reshape((-1, 1))
Expand Down
24 changes: 24 additions & 0 deletions notebooks/Binomial_MC_script_2/main_mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from multiprocessing import Process, Queue
import multiprocessing as mp


import model_mc
import warnings
import pickle
import sys
warnings.filterwarnings("ignore")


if __name__ == '__main__':
for i in range(int(sys.argv[1]),int(sys.argv[2]), 50):
start = i
end = i + 50

print("Starting iterations for indexes: {} to {}".format(str(start), str(end)))

pool = mp.Pool(processes=50)
result = pool.map(model_mc.models, range(start,end))

print("Completing iterations for indexes: {} to {}".format(str(start), str(end)))
with open("pkls/results-{}-{}.pkl".format(str(start), str(end)), 'wb') as out:
pickle.dump(result, out, pickle.HIGHEST_PROTOCOL)
117 changes: 117 additions & 0 deletions notebooks/Binomial_MC_script_2/model_mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import sys
sys.path.append("C:/Users/msachde1/Downloads/Research/Development/mgwr")
import pandas as pd
import numpy as np

from mgwr.gwr import GWR
from spglm.family import Gaussian, Binomial, Poisson
from mgwr.gwr import MGWR
from mgwr.sel_bw import Sel_BW

class stats(object):
def __init__(self):
self.gwr_bw = []
self.gwr_aicc = []
self.gwr_bic=[]
self.gwr_aic=[]
self.gwr_params = []
self.gwr_predy = []
self.gwr_rss = []

self.mgwr_bw = []
self.mgwr_aicc = []
self.mgwr_bic=[]
self.mgwr_aic=[]
self.mgwr_params = []
self.mgwr_predy = []
self.mgwr_rss = []

def add(a,b):
return 1+((1/12)*(a+b))

def con(u,v):
return (0*(u)*(v))+0.3

def sp(u,v):
return 1+1/324*(36-(6-u/2)**2)*(36-(6-v/2)**2)

def med(u,v):
B = np.zeros((25,25))
for i in range(25):
for j in range(25):

if u[i][j]<=8:
B[i][j]=0.2
elif u[i][j]>17:
B[i][j]=0.7
else:
B[i][j]=0.5
return B

class foo(object):
def __init__(self):
self.x = []
self.name = ""
self.num = 0

def fkj(name, output):
ff = foo()
ff.x = [1]
ff.name = name
output.put(ff)
return

def models(output):
print("start of the function")
s = stats()
x = np.linspace(0, 25, 25)
y = np.linspace(25, 0, 25)
X, Y = np.meshgrid(x, y)
x1=np.random.normal(0,1,625).reshape(-1,1)
x2=np.random.normal(0,1,625).reshape(-1,1)
#x3=np.random.normal(0,1,625).reshape(-1,1)
error = np.random.normal(0,0.1,625)

B0=con(X,Y).reshape(-1,1)
#B1=add(X,Y).reshape(-1,1)
B2=sp(X,Y).reshape(-1,1)
B3=med(X,Y).reshape(-1,1)

lat=Y.reshape(-1,1)
lon=X.reshape(-1,1)

param = np.hstack([B0,B2,B3])
cons=np.ones_like(x1)
X=np.hstack([cons,x1,x2])
y_raw = ((np.exp(np.sum(X * param, axis=1)+error)/(1+(np.exp(np.sum(X * param, axis=1)+error))))).reshape(-1,1)
#y_raw=(np.exp((np.sum(X * param, axis=1)+error).reshape(-1, 1)))
#y_new = np.random.poisson(y_raw)
y_new = np.random.binomial(1,y_raw,(625,1))

coords = np.array(list(zip(lon,lat)))
y = np.array(y_new).reshape((-1,1))
X1=np.hstack([x1,x2])

bw=Sel_BW(coords,y,X1,family=Binomial())
bw=bw.search()
s.gwr_bw.append(bw)
gwr_model=GWR(coords,y,X1,bw,family=Binomial()).fit()
s.gwr_aicc.append(gwr_model.aicc)
s.gwr_bic.append(gwr_model.bic)
s.gwr_aic.append(gwr_model.aic)
s.gwr_params.append(gwr_model.params)
s.gwr_predy.append(gwr_model.predy)
s.gwr_rss.append(gwr_model.resid_ss)

selector=Sel_BW(coords,y,X1,multi=True,family=Binomial())
selector.search()
s.mgwr_bw.append(selector.bw[0])
mgwr_model=MGWR(coords,y,X1,selector,family=Binomial()).fit()
s.mgwr_aicc.append(mgwr_model.aicc)
s.mgwr_bic.append(mgwr_model.bic)
s.mgwr_aic.append(mgwr_model.aic)
s.mgwr_params.append(mgwr_model.params)
s.mgwr_predy.append(mgwr_model.predy)
s.mgwr_rss.append(mgwr_model.resid_ss)

return s
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
24 changes: 24 additions & 0 deletions notebooks/Binomial_MC_script_we/main_mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from multiprocessing import Process, Queue
import multiprocessing as mp


import model_mc
import warnings
import pickle
import sys
warnings.filterwarnings("ignore")


if __name__ == '__main__':
for i in range(int(sys.argv[1]),int(sys.argv[2]), 10):
start = i
end = i + 10

print("Starting iterations for indexes: {} to {}".format(str(start), str(end)))

pool = mp.Pool(processes=10)
result = pool.map(model_mc.models, range(start,end))

print("Completing iterations for indexes: {} to {}".format(str(start), str(end)))
with open("pkls/results-{}-{}.pkl".format(str(start), str(end)), 'wb') as out:
pickle.dump(result, out, pickle.HIGHEST_PROTOCOL)
Loading