SDCCA · meiertgrootes · Jun 20, 2023 · Jun 23, 2023 · Jun 26, 2023 · Jun 26, 2023
diff --git a/stat_tests/stat_test.py b/stat_tests/stat_test.py
@@ -0,0 +1,122 @@
+import pandas as pd
+import argparse
+import seaborn as sns
+import stat_test_utils as stu
+import os
+
+defaultbaseline ="../test_data/V2_500Agents_125StepsSeed1.csv"
+defaultpovertythreshold = 1.
+
+def parse_args():
+	"""
+    The tests can be run using
+    python stat_test.py 
+	"""
+	parser = argparse.ArgumentParser()
+
+	parser.add_argument( "--simulation","-s",help="path to simulation output to be evaluated for similarity ", type=str, required=True)
+	parser.add_argument( "--baseline", "-b", help="Optional. Path to simulation output to be used as baseline", type=str, default=defaultbaseline)
+	parser.add_argument( "--povertythreshold", "-pt", help="Value for poverty threshold used", type=float,default=defaultpovertythreshold)
+
+	args=parser.parse_args()
+	return args
+
+
+def load_sim_file(path,*kwargs):
+	data=[]
+	if os.path.isfile(path):
+		try:
+			data = pd.read_csv(path,*kwargs)
+		except:
+		    print(f"failed to read {path}")
+
+	else:
+	    print(f"{path} is not a file")
+	return data									
+
+
+def print_stat_test_output(pvalues,properties):
+    if len(pvalues) != len(properties):
+        print('mismatch in properties and p values')
+    else:
+        for pv, prop in zip(pvalues,properties):
+            if pv > 0.05:
+                print(f"Null hypothesis of same parent distribution accepted for {prop} at p = {pv} \n")
+            else:
+                print(f"Null hypothesis of same parent distribution rejected for {prop} at p = {pv} \n")
+
+
+def perform_stat_test(simulation_file, baseline_file, povertythreshold):
+
+    print('loading data ...\n')
+    sim_data = load_sim_file(simulation_file)
+    base_data = load_sim_file(baseline_file)
+
+    """
+    calculate derived properties
+    (i) poverty 
+    """
+
+    print("calculating derived properties ...\n")
+    sim_data["InPoverty"] = sim_data["k_t"] < povertythreshold
+    base_data["InPoverty"] = base_data["k_t"] < povertythreshold
+
+
+    sim_total_steps_in_poverty = sim_data.groupby("AgentID").sum("InPoverty")[["InPoverty"]]
+    sim_max_consec_steps = sim_data.groupby("AgentID").apply(stu.max_consec)
+
+    base_total_steps_in_poverty = base_data.groupby("AgentID").sum("InPoverty")[["InPoverty"]]
+    base_max_consec_steps = base_data.groupby("AgentID").apply(stu.max_consec)
+
+    """
+    (ii) Technology regime switches
+    """
+
+    sim_switches=sim_data.groupby("AgentID").apply(stu.tally_switches)
+
+    base_switches=base_data.groupby("AgentID").apply(stu.tally_switches)
+
+    """
+    join derived properties for further analysis
+    """
+
+    sim_derived = sim_total_steps_in_poverty.join(sim_max_consec_steps, on="AgentID").join(sim_switches, on="AgentID")
+    base_derived = base_total_steps_in_poverty.join(base_max_consec_steps, on="AgentID").join(base_switches, on="AgentID")
+
+
+    """
+    Perform two sample statistical distribution tests on final distributions of simulated properties
+    """
+    print("performing Cramer von Mises wo sample tests ...\n")
+    #Cramer von mises
+    cvm_kt_p, cvm_kt_s = stu.CramerVonMises(sim_data,base_data,'k_t',collateSteps=True)
+
+    cvm_total_poverty_p, cvm_total_poverty_s = stu.CramerVonMises(sim_derived,base_derived,'InPoverty')
+    cvm_consec_poverty_p, cvm_consec_poverty_s = stu.CramerVonMises(sim_derived,base_derived,'MaxConsec')
+
+    cvm_LtoH_p, cvm_LtoH_statistic = stu.CramerVonMises(sim_derived,base_derived,'LtoH')
+    cvm_HtoL_p, cvm_HtoL_statistic = stu.CramerVonMises(sim_derived,base_derived,'HtoL')
+
+
+    """
+    Print results.
+
+    TODO: Tie into CI
+    """
+    print_stat_test_output([cvm_kt_p,cvm_total_poverty_p,cvm_consec_poverty_p,cvm_LtoH_p,cvm_HtoL_p],['k_t','total steps in poverty','max consecutive steps in poverty','switches from L to H technology','switches from H to L technology'])
+
+
+def main():
+    args = parse_args()
+
+    simulation_file = args.simulation
+    baseline_file = args.baseline
+    povertythreshold = args.povertythreshold
+
+    perform_stat_test(simulation_file, baseline_file, povertythreshold)
+
+
+if __name__ == "__main__":
+    main()	
+
+
diff --git a/stat_tests/stat_test_utils.py b/stat_tests/stat_test_utils.py
@@ -0,0 +1,65 @@
+# From agent data calculates maximum consecutive number and total number of time steps
+# spent below a set threshold for each agent at t_final
+
+import pandas as pd
+import scipy.stats as st
+
+
+def max_consec(values):
+# Compare poverty bool of step and previous step; if both are true, raise the consecutive day tally. 
+# If the running tally is greater than the current maximum, set a new maximum.
+# If there were no consecutive days, return 0, otherwise return the maximum (plus one day).
+
+    values.set_index("Step")
+    tally,maximum=0,0
+    for i in range(1,len(values)):
+        if(values.iloc[i]["InPoverty"] == True) & (values.iloc[i-1]["InPoverty"] == True):
+            tally+=1
+        else:
+            if tally > maximum:
+                maximum=tally
+            tally=0
+ #Incase some one was in proverty till the end, do a check at the end for maximum days (niche case)
+    if tally > maximum:
+        maximum=tally
+    if maximum > 0:
+        return pd.Series({"MaxConsec":maximum+1})
+    return pd.Series({"MaxConsec":0})
+
+
+
+def tally_switches(values):
+# Compare technology of step and previous step; if equal, pass to the next step. 
+# If unequal, add 1 to the Low to High tally if the previous value was "L" or 
+# 1 to the High to Low tally if the previous value was not "L". Return the tallies.
+    values.set_index("Step")
+    LtoHtally,HtoLtally=0,0
+    for i in range(1,len(values)):
+        if values.iloc[i]["technology"]==values.iloc[i-1]["technology"]:
+            pass
+        elif values.iloc[i-1]["technology"]=="L":
+            LtoHtally+=1
+        else:
+            HtoLtally+=1
+
+    return pd.Series({"LtoH":LtoHtally,"HtoL":HtoLtally})
+
+
+
+def CramerVonMises(sim,base,prop,collateSteps=False):
+    if collateSteps == True:
+        maxStepSim = max(sim["Step"])
+        maxStepBase = max(base["Step"])
+        simProp = sim[sim['Step']==maxStepSim][prop]
+        baseProp = base[base['Step']==maxStepBase][prop]
+    else:
+        simProp = sim[prop]
+        baseProp = base[prop]
+
+    cvm = st.cramervonmises_2samp(simProp,baseProp)
+    return cvm.pvalue, cvm.statistic
+
+
+
+
+