forked from NTU-CompHydroMet-Lab/pyBL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRunall_main.py
226 lines (179 loc) · 7.95 KB
/
Runall_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# %%
#5T 1H 6H 1D
#1H 3H 6H 1D
# datatime not datetime
import dotmap
from datetime import datetime as dt
from Library.Cal_stats_lib.utils.utils import *
from Library.Cal_stats_lib.utils.stats_calculation import *
args = yaml.load(open('./config/default.yaml'), Loader=yaml.FullLoader)
args = dotmap.DotMap(args)
# step 1 get the input
rawData = pd.read_csv('./01 Input_data/test.csv', index_col='datatime')
rawData.index = pd.to_datetime(rawData.index)
pDryThreshold=0.5
# step 1.5 check the value
if len(rawData.columns.values) != 1:
print('the raw data have more than one column!! Check again!!')
else:
rawData = rawData[rawData.columns.values[0]]
## delete all the negative number
## assign flags
rawData = rawData.drop(rawData.index[rawData < 0.0])
# step 2 read which kind of property
prop = pd.read_csv(args.IO.config_path, index_col=0, header=None)
propertyList = prop.loc['prop'].dropna().to_numpy()
print('The used properties: {}'.format(propertyList))
# step 3 read the timescale
timeScaleList = prop.loc['time'].dropna().to_numpy()
meanTimeScale = prop.loc['timeForMean'].dropna().to_numpy()
outputMean = prop.loc['OutputTimeScaleForMean'].dropna().to_numpy()
print('The time scales: {}'.format(timeScaleList))
print('The time scale of Mean: {}'.format(meanTimeScale))
print(f'Output time scale for Mean: {outputMean}')
if outputMean[0] not in meanTimeScale.tolist():
print('-'*50)
print(f'Mean will not be calculated because {outputMean} is not in timeForMean, please choose the scale from timeForMean.')
# step 4-6 calculate and output statistical properties and their weights
createStatisticalFile(rawData, propertyList, timeScaleList, meanTimeScale, outputMean, args.IO.stats_file_path, args.IO.weight_file_path, pDryThreshold)
# %%
# %%
#2:40 per month
import pandas as pd
import argparse
import numpy as np
from Library.Cal_stats_lib.utils.utils import *
import dotmap
from datetime import datetime as dt
from Library.BLRPRmodel.BLRPRx import *
from Library.Fitting_lib import fitting, objectiveFunction
from datetime import datetime
args = yaml.load(open('./config/default.yaml'), Loader=yaml.FullLoader)
args = dotmap.DotMap(args)
# step 0 read statistical properties and corresponding weights
num_month = pd.read_csv(args.IO.stats_file_path, index_col=0, header=0).shape[0]
TSF = pd.read_csv(args.IO.config_path, index_col=0, header=None)
timeScale = TSF.loc['time'].dropna().tolist()
timeScaleList = fitting.scalesTransform(timeScale)
print('There are {} rows to fit'.format(num_month))
file = pd.read_csv('./02 Output_data/result.csv')
result_csv = []
sav = {}
# for each calendar month
for month in range(1, num_month+1):
print(f'====================== Month : {month} =======================')
# step 1 set initial theta
theta = [None] * 9
if args.fitting.theta_initial is None:
theta[0] = 0.01 # lambda #
theta[1] = 0.1 # iota #
theta[2] = 1.0 # sigma/mu
theta[3] = 1e-6 # spare space
theta[4] = 2.1 # alpha #
theta[5] = 7.1 # alpha/nu
theta[6] = 0.1 # kappa #
theta[7] = 0.1 # phi #
theta[8] = 1.0 # c
else:
theta = args.fitting.theta_initial
#print(f'Original theta : {theta}')
# step 2 initialize objective function
obj = objectiveFunction.Exponential_func
# step 3 initialize fitting model
if args.fitting.moment == 'BLRPRx':
fitting_model = BLRPRx(args.fitting.intensity_mode)
# step 4 find approximate global optimum using Dual Annealing algorithm.
past = datetime.now()
final, score = fitting.Annealing(
theta, obj, fitting_model, month, timeScaleList, args.IO.stats_file_path, args.IO.weight_file_path, args.IO.config_path)
print('Time cost == {}, Row {} theta after Annealing == {}'.format((datetime.now() - past), month,
["%.9f" % elem for elem in final]))
if score >= 1:
past = datetime.now()
final, score = fitting.Basinhopping(theta, obj, fitting_model, month, timeScaleList,
args.IO.stats_file_path, args.IO.weight_file_path, args.IO.config_path)
print('Time cost == {}, Row {} theta after Basinhopping == {}'.format((datetime.now() - past),
month,["%.9f" % elem for elem in final]))
# step 5 try to find a better local minimum using Nelder Mead algorithm
past = datetime.now()
local_final, local_score = fitting.Nelder_Mead(
final, obj, fitting_model, month, timeScaleList, args.IO.stats_file_path, args.IO.weight_file_path, args.IO.config_path)
print('Time cost == {}, Row {} theta after Nelder_Mead == {}'.format((datetime.now() - past), month,
["%.9f" % elem for elem in local_final]))
final = local_final if local_score < score else final
final_stats = objectiveFunction.Cal(final, timeScaleList, fitting_model)
sav[month] = final_stats
print(f'Final Stats : {final_stats}')
# do some minor modifications
# delete useless variables
final = np.delete(final, [2, 3])
final[-1] = 1.0
result_csv.append(final)
file.loc[month] = final
prop = pd.read_csv(args.IO.config_path, index_col=0, header=None)
propertyList = prop.loc['prop'].dropna().to_numpy().tolist()
cols = []
for sinscale in timeScale:
for sinStaprop in propertyList[:-1]:
cols.append(str(sinStaprop) + '_' + str(sinscale))
cols.insert(0,'Mean_1h')
# step 6 output the result theta
file.to_csv('./02 Output_data/Theta.csv', index=False)
df = pd.DataFrame(sav).T
df.columns = cols
df.to_csv('Mfinal_staProp.csv')
print('Calibration finished')
# %%
# %%
#resample will compensate all empty timestemp
#1h 3h 6h 24h if ih sample 5T
from dotmap import DotMap
from Library.BLRPRmodel.BLRPRx import *
from calendar import month_abbr
from datetime import timedelta as td
from datetime import datetime as dt
from datetime import datetime
from Library.Cal_stats_lib.utils.utils import *
from Library.Cal_stats_lib.utils.stats_calculation import *
import numpy as np
import pandas as pd
import os
from Library.Sampling_lib.mergeCells import *
from Library.Sampling_lib.sampling import *
from Library.Sampling_lib.compare import *
from Library.Fitting_lib import fitting, objectiveFunction
import warnings, yaml
args = yaml.load(open('./config/default.yaml'), Loader=yaml.FullLoader)
args = DotMap(args)
args.sampling.start_time = dt.strptime(args.sampling.start_time, '%Y-%m-%d')
args.sampling.end_time = dt.strptime(args.sampling.end_time, '%Y-%m-%d')
total_sample_sta_prop = []
num_month = pd.read_csv(args.IO.stats_file_path,index_col=0, header=0).shape[0]
pDryThreshold = 0.5
timeseries = []
for month in range(1, num_month+1):
start_time, end_time, freq = args.sampling.start_time, args.sampling.end_time, args.sampling.freq
duration_sim_hr = (end_time - start_time).total_seconds() / 3600
# step 0 read result thetas(lambda, iota, alpha, nu, kappa, phi)
thetas = pd.read_csv('./02 Output_data/Theta.csv').loc[month-1]
print('='*50)
print(f'Mon : {str(month)}/{str(num_month)}')
# step 1 sample storms
alpha, nu, phi,storms = SampleStorm(thetas, start_time, duration_sim_hr)
#import pdb; pdb.set_trace()
rainfall_ts = MergeCells(storms, freq)
# step 2 calculate the sta prop of the sample ts
sample_sta_prop = sample_Sta_prop(rainfall_ts, args.IO.config_path, pDryThreshold)
total_sample_sta_prop.append(sample_sta_prop)
print("----------Finish row {} sampling!----------\n\n".format(month))
timeseries.append(rainfall_ts)
# step 3 output the sta prop
df = pd.DataFrame(total_sample_sta_prop[0])
for i in range(1, len(total_sample_sta_prop)):
df2 = pd.DataFrame(total_sample_sta_prop[i])
fin_df = pd.concat([df, df2])
df = fin_df
print('Sampling Finished')
fin_df.to_csv(args.IO.sample_stats_path)
print('All tasks are done')
# %%