From 9ab57ac19f8e516b87d9652f4c0eb0d39c1ca93e Mon Sep 17 00:00:00 2001 From: VickyS08 <18540289+VickyS08@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:50:15 -0500 Subject: [PATCH] Adding more data types to process --- DRAW-post-processing/config.py | 108 +++---- DRAW-post-processing/execute_post_process.py | 148 +++++++--- DRAW-post-processing/mysql_indexes.py | 5 +- DRAW-post-processing/phase1_methods.py | 132 +++++++-- DRAW-post-processing/phase3_methods.py | 21 +- .../post_process_ids/id1/id_1_phase_1.py | 10 +- .../post_process_ids/id1/id_1_phase_2.py | 12 +- .../post_process_ids/id1/id_1_phase_3.py | 15 +- .../post_process_ids/id10/id_10_phase_1.py | 43 +++ .../post_process_ids/id10/id_10_phase_3.py | 10 + .../post_process_ids/id11/id_11_phase_1.py | 43 +++ .../post_process_ids/id11/id_11_phase_3.py | 20 ++ .../post_process_ids/id12/id_12_phase_1.py | 41 +++ .../post_process_ids/id12/id_12_phase_3.py | 11 + .../post_process_ids/id13/id_13_phase_1.py | 41 +++ .../post_process_ids/id13/id_13_phase_3.py | 23 ++ .../post_process_ids/id2/id_2_phase_1.py | 69 +++++ .../post_process_ids/id2/id_2_phase_3.py | 20 ++ .../post_process_ids/id3/id_3_phase_1.py | 15 +- .../post_process_ids/id3/id_3_phase_3.py | 15 +- .../post_process_ids/id4/id_4_phase_1.py | 213 ++------------ .../post_process_ids/id4/id_4_phase_3.py | 13 +- .../post_process_ids/id5/id_5_phase_1.py | 68 ++--- .../post_process_ids/id5/id_5_phase_3.py | 23 +- .../post_process_ids/id6/id_6_phase_1.py | 80 ++--- .../post_process_ids/id6/id_6_phase_3.py | 12 +- .../post_process_ids/id7/id_7_phase_1.py | 44 ++- .../post_process_ids/id7/id_7_phase_3.py | 4 +- .../post_process_ids/id8/id_8_phase_1.py | 42 +++ .../post_process_ids/id8/id_8_phase_3.py | 11 + .../post_process_ids/id9/id_9_phase_1.py | 275 ++++++++++++++++++ .../post_process_ids/id9/id_9_phase_3.py | 11 + DRAW-post-processing/sql_commands.py | 2 +- DRAW-post-processing/tables.py | 22 +- 34 files changed, 1151 insertions(+), 471 deletions(-) create mode 100644 DRAW-post-processing/post_process_ids/id10/id_10_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id10/id_10_phase_3.py create mode 100644 DRAW-post-processing/post_process_ids/id11/id_11_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id11/id_11_phase_3.py create mode 100644 DRAW-post-processing/post_process_ids/id12/id_12_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id12/id_12_phase_3.py create mode 100644 DRAW-post-processing/post_process_ids/id13/id_13_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id13/id_13_phase_3.py create mode 100644 DRAW-post-processing/post_process_ids/id2/id_2_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id2/id_2_phase_3.py create mode 100644 DRAW-post-processing/post_process_ids/id8/id_8_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id8/id_8_phase_3.py create mode 100644 DRAW-post-processing/post_process_ids/id9/id_9_phase_1.py create mode 100644 DRAW-post-processing/post_process_ids/id9/id_9_phase_3.py diff --git a/DRAW-post-processing/config.py b/DRAW-post-processing/config.py index f4e5e1e..f724840 100644 --- a/DRAW-post-processing/config.py +++ b/DRAW-post-processing/config.py @@ -5,53 +5,60 @@ # assigning post-process ID's to field ID's # assign a TUPLE to multiple field_id's with one PPID; otherwise assign integer for single field_id +# 1: pressure(in_hg), 2: vapour pressure(in_Hg), 3:temperature (F), 4: precipitation (in), 5: direction, 6: velocity (miles), 7: weather, 8: cloud type, 9:time, 10: RH (%), 11: cloud cover (tenths), 12 :various (character) ppid_to_field_id = {1: (4, 6, 7, 8, 67, 69), 2: 14, - 3: (5,9,10,11,12,13,36,37,38,39,63,64,68,76,77,78,79), - 4: (25, 26, 27, 28, 29, 30, 31, 50, 70, 71), - 5: (16, 17, 22, 23, 24, 53, 54), - 6: (19, 20, 21, 34, 35, 48, 61, 85), - 7: (18, 40, 41, 44, 51, 52, 56, 57, 66, 80, 82, 83)} - -sef_type_to_field_id ={ "atb": (5,68), - "au": (47), - "cl":(22,53), - "cd":(23), - "ch":(16,54), - "dd":(19), - "e":(14), - "hd":(17), - "mslp":(7), - "nl":(24), - "p": (4,67), - "p_cor":(7,69), - "pr":(27,31), - "ptb":(25,28), - "pte":(26,29), - "rh":(15,58,59,73,75,60,72), - "rrt":(66), - "sd":(50), - "rain_dur":(70), - "snow_dur":(71), - "ss":(65), - "ta":9, - "ta_cor":(10), - "tb":(11), - "tb_cor":(12), - "td":(33), - "TGn":(62,81), - "Tn":(38,76), - "Tn_cor":(37,77), - "Tx":(36,78), - "Tx_cor":(37,79), - "Tsx":(63,64), - "wf":(85), - "w":(34,48,35), - "ws":(20), - "ww":(18,52,40,44,57,56,80,83), - "w2":(51,41,82) - } -sef_type_to_unit={ "atb": "C", + 3: (5, 9, 10, 11, 12, 13, 33, 36, 37, 38, 39, 62, 63, 64, 68, 76, 77, 78, 79, 81), + 4: (27, 30, 31, 50), + 5: (17, 19, 23), + 6: (20), + 7: (18, 40, 41, 44, 51, 52, 56, 57, 66, 80, 82, 83), + 8: (16, 22, 53, 54), + 9: (25, 26, 28, 29, 46, 65, 70, 71), + 10: (15, 58, 59, 60, 72, 74, 75), + 11: (24), + 12: (21, 61, 42, 47, 66), + 13: (34, 35, 48)} + +sef_type_to_field_id = {"atb": (5, 68), + "au": (47), + "cl": (22, 53), + "cd": (23), + "ch": (16, 54), + "dd": (19), + "e": (14), + "hd": (17), + "mslp": (7), + "nl": (24), + "p": (4, 67), + "p_cor": (7, 69), + "pr": (27, 31), + "ptb": (25, 28), + "pte": (26, 29), + "rh": (15, 58, 59, 73, 75, 60, 72), + "rrt": (66), + "sd": (50), + "rain_dur": (70), + "snow_dur": (71), + "ss": (65), + "ta": (9), + "ta_cor": (10), + "tb": (11), + "tb_cor": (12), + "td": (33), + "TGn": (62, 81), + "Tn": (38, 76), + "Tn_cor": (37, 77), + "Tx": (36, 78), + "Tx_cor": (37, 79), + "Tsx": (63, 64), + "wf": (85), + "w": (34, 48, 35), + "ws": (20), + "ww": (18, 52, 40, 44, 57, 56, 80, 83), + "w2": (51, 41, 82) + } +sef_type_to_unit={"atb": "C", "au": "text", "cl":"lct", "cd":"dir", @@ -166,23 +173,26 @@ def possible_pressure_formats(value, for_leading_digits): temperature_min = -100.0 temperature_max= { - "5": 100, + "5": 100, "9": 120, "10": 120, - "11": 110, + "11": 110, "12": 110, "13": 110, + "33": 130, "36": 130, "37": 130, "38": 100, "39": 100, + "62": 130, "63": 160, "64": 160, "68": 100, "76": 100, "77": 100, "78": 130, - "79": 130 + "79": 130, + "81": 130 } @@ -203,7 +213,7 @@ def possible_pressure_formats(value, for_leading_digits): temperature_air_wet_bulb=[[9,11,13],[10,12,13]] #same observation time: abs(abs(field[0]-field[1])-abs(field[2])) 0 +def remove_negatives(value, entry): + original_value = value + try: + if ('-' in value): + value=value.replace('-','') + if value != original_value: + tables.add_error_edit_code(1, '020', original_value, value, entry) + except ValueError: + pass + return value + + +# remove any question marks for values with should always be > 0 +def remove_question(value, entry): + original_value = value + try: + if ('?' in value): + value = value.replace('?','') + if value != original_value: + tables.add_error_edit_code(1, '021', original_value, value, entry) + except ValueError: + pass + return value + # removes a set amount of trailing digits from a number (specified by 'number' parameter) def remove_trailing_digits(value, number, entry): @@ -193,6 +218,32 @@ def insert_element_at_index(value, index, element, entry): ##################### CONDITIONAL STATEMENT CHECKS BELOW ##################### +# Checking to see if value is a float, integer or character + +def desired_cloudcover_format(value): + try: + int(value) +# if len(value) == 2: + return True + except ValueError: + return False + except TypeError: + return False + return False + + +def desired_precipitation_format(value): + try: + float(value) +# if len(value) <= 5 and float_decimal_index(value) <= 2: + return True + except ValueError: + return False + except TypeError: + return False + return False + + # Checking to see if raw pressure value is of form XX.XXX def desired_pressure_format(value): try: @@ -205,7 +256,19 @@ def desired_pressure_format(value): return False return False -# Checking to see if temperature value is a float + +def desired_relhum_format(value): + try: + float(value) +# if len(value) == 6 and float_decimal_index(value) == 2: + return True + except ValueError: + return False + except TypeError: + return False + return False + + def desired_temperature_format(value): try: float(value) @@ -217,6 +280,31 @@ def desired_temperature_format(value): return False return False + +def desired_vapourpressure_format(value): + try: + float(value) +# if len(value) <= 4: + return True + except ValueError: + return False + except TypeError: + return False + return False + + +def desired_windvelocity_format(value): + try: + float(value) +# if len(value) == 6 and float_decimal_index(value) == 2: + return True + except ValueError: + return False + except TypeError: + return False + return False + + # returns True if value is of form XX XXX where the space is one of ( '/' ';' ',' '-' ) def pressure_decimal_alternate(value): try: diff --git a/DRAW-post-processing/phase3_methods.py b/DRAW-post-processing/phase3_methods.py index 5988a8f..b89b637 100644 --- a/DRAW-post-processing/phase3_methods.py +++ b/DRAW-post-processing/phase3_methods.py @@ -1,7 +1,20 @@ # -*- coding: utf-8 -*- -def baro_Eng_in2mb(ei): - return float(ei) * 33.86389 +# def baro_Eng_in2mb(ei): +# return float(ei) * 33.86389 -def temp_f2c(tf): - return (5.0/9.0) * (float(tf)-32.0) + +# def temp_f2c(tf): +# return (5.0/9.0) * (float(tf)-32.0) + + +def depth_in2mm(pp): + return (float(pp)*25.4) + + +def vel_mph2mps(vel): + return float(vel*0.44704) + + +def dis_mi2m(dis): + return float(dis*1609.34) diff --git a/DRAW-post-processing/post_process_ids/id1/id_1_phase_1.py b/DRAW-post-processing/post_process_ids/id1/id_1_phase_1.py index bf41a39..2f7915c 100644 --- a/DRAW-post-processing/post_process_ids/id1/id_1_phase_1.py +++ b/DRAW-post-processing/post_process_ids/id1/id_1_phase_1.py @@ -2,6 +2,7 @@ import phase1_methods as methods import tables +# id1 = pressure values def phase_1(entry): @@ -24,13 +25,14 @@ def phase_1(entry): value = methods.correct_double_decimals(value, return_list) value = methods.remove_alphabetical_char(value, return_list) value = methods.remove_unexpected_characters(value, return_list) + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) return_list[1] = value # checking again if pressure value is of form XX.XXX after simple clean-up methods if methods.desired_pressure_format(value): tables.add_to_corrected_table(*return_list, 0) - # TODO : 'Illegible' entries (currently disregarded) # TODO : append error_code in corrected table to any value being passed to said table with value '1' @@ -44,7 +46,7 @@ def phase_1(entry): tables.add_error_edit_code(1, '015', original_value, value, return_list) tables.add_to_corrected_table(*return_list, 0) - # checking and fixing accordingly if pressure value of form 0.XXX, 2.XXX, 3.XXX, or 9.XXX +# checking and fixing accordingly if pressure value of form 0.XXX, 2.XXX, 3.XXX, or 9.XXX elif methods.float_decimal_index(value) == 1: match int(value[0]): case 2: @@ -95,8 +97,6 @@ def phase_1(entry): else: tables.add_error_edit_code(1, '001', value, '', return_list) tables.add_to_corrected_table(*return_list, 1) - - elif len(value) == 6: # value of form XXXXXX: if value.isnumeric(): @@ -121,8 +121,6 @@ def phase_1(entry): else: tables.add_error_edit_code(1, '001', value, '', return_list) tables.add_to_corrected_table(*return_list, 1) - - elif len(value) == 4: # value of form XXXX if value.isnumeric(): diff --git a/DRAW-post-processing/post_process_ids/id1/id_1_phase_2.py b/DRAW-post-processing/post_process_ids/id1/id_1_phase_2.py index df048ff..6867a4a 100644 --- a/DRAW-post-processing/post_process_ids/id1/id_1_phase_2.py +++ b/DRAW-post-processing/post_process_ids/id1/id_1_phase_2.py @@ -1,11 +1,10 @@ # post-processing range/consistency check algorithm for post_process_id = 1 (phase 2) - +# id1 = pressure values import phase2_methods as methods import config import tables import sys - def phase_2(entry, lead_digs_added): return_list = list(entry) value = return_list[1] @@ -18,23 +17,20 @@ def phase_2(entry, lead_digs_added): elif return_list[4] in {4, 6, 7, 8}: try: - resultant_value=methods.equation_resultant_value(entry) + resultant_value = methods.equation_resultant_value(entry) if (resultant_value is not None): diff_equation_transcribed = abs(float(value) - resultant_value) if diff_equation_transcribed >= config.pressure_diff_threshold: - if lead_digs_added: - methods.check_lead_digs_with_equation(diff_equation_transcribed, return_list, lead_digs_added) - + methods.check_lead_digs_with_equation(diff_equation_transcribed, return_list, lead_digs_added) elif not lead_digs_added: if methods.fluctuation_exceeds_normal(return_list): pass # TODO : PASS THROUGH check_other_transcription_errors() pass # TODO : PASS THROUGH check_other_transcription_errors() - else: tables.add_to_final_corrected_table(*return_list) pass # TODO : for when difference is not great, but can try to pick out possible smaller errors else: tables.add_to_final_corrected_table(*return_list) except: - print(value,methods.equation_resultant_value(entry),file=sys.stderr) \ No newline at end of file + print(value,methods.equation_resultant_value(entry),file=sys.stderr) diff --git a/DRAW-post-processing/post_process_ids/id1/id_1_phase_3.py b/DRAW-post-processing/post_process_ids/id1/id_1_phase_3.py index 008056d..cf5390d 100644 --- a/DRAW-post-processing/post_process_ids/id1/id_1_phase_3.py +++ b/DRAW-post-processing/post_process_ids/id1/id_1_phase_3.py @@ -1,16 +1,19 @@ import tables -import phase3_methods as methods +# import phase3_methods as methods +import lmrlib +# id1 = pressure values + def phase_3(entry): return_list = list(entry) value = entry[1] - if value!='-999': + if value != '-999': try: - value = '{:.2f}'.format(methods.baro_Eng_in2mb(value)) + value = '{:.2f}'.format(lmrlib.baro_Eng_in2mb(value)) except ValueError: - value=entry[1] + value = entry[1] except TypeError: - value=entry[1] + value = entry[1] return_list[1] = value - tables.add_to_final_corrected_table_iso(*return_list) \ No newline at end of file + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id10/id_10_phase_1.py b/DRAW-post-processing/post_process_ids/id10/id_10_phase_1.py new file mode 100644 index 0000000..9baa110 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id10/id_10_phase_1.py @@ -0,0 +1,43 @@ +import phase1_methods as methods +import tables +# id 10 = relative humidity + + +def phase_1(entry): + + return_list = list(entry) + value = entry[1] + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves + inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') + + if value != None and value != '': # deal with non-empty entries + value = value.lower() + + if value in synonyms_empty: # if value purposely left empty + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + if value in inaps: # to unify values + value = 'inapp' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: # general formatting + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") + return_list[1] = value + if (isinstance(value, (int, float)) == "TRUE"): + tables.add_to_corrected_table(*return_list, 0) + else: + tables.add_error_edit_code(1, '024', value, return_list[1], return_list) + return None + + elif value == None or value == '': # deal with empty entries + tables.add_to_corrected_table(*return_list, 0) + return None diff --git a/DRAW-post-processing/post_process_ids/id10/id_10_phase_3.py b/DRAW-post-processing/post_process_ids/id10/id_10_phase_3.py new file mode 100644 index 0000000..ec56ddf --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id10/id_10_phase_3.py @@ -0,0 +1,10 @@ +import tables +import phase3_methods as methods +# id 10 = relative humidity + +def phase_3(entry): + return_list = list(entry) + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id11/id_11_phase_1.py b/DRAW-post-processing/post_process_ids/id11/id_11_phase_1.py new file mode 100644 index 0000000..666d618 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id11/id_11_phase_1.py @@ -0,0 +1,43 @@ +import phase1_methods as methods +import tables +# id 11 = cloud cover (tenths) + + +def phase_1(entry): + + return_list = list(entry) + value = entry[1] + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') +# single_letters = ('S', 's', 'T', 't', 'N', 'n') # things I noticed that appear by themselves + inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') + + if value != None and value != '': # deal with non-empty entries + value = value.lower() + + if value in synonyms_empty: # if value purposely left empty + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + if value in inaps: # to unify values + value = 0 + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: # general formatting + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") + return_list[1] = value + if (isinstance(value, (int, float)) == "TRUE"): + tables.add_to_corrected_table(*return_list, 0) + else: + tables.add_error_edit_code(1, '024', value, return_list[1], return_list) + return None + + elif value == None or value == '': # deal with empty entries + tables.add_to_corrected_table(*return_list, 0) + return None diff --git a/DRAW-post-processing/post_process_ids/id11/id_11_phase_3.py b/DRAW-post-processing/post_process_ids/id11/id_11_phase_3.py new file mode 100644 index 0000000..f876ab8 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id11/id_11_phase_3.py @@ -0,0 +1,20 @@ +import tables +# import phase3_methods as methods +import lmrlib +# id 11 = cloud cover (tenths) + + +def phase_3(entry): + return_list = list(entry) + value = entry[1] + if value != '-999': + try: + v=float(value) + value = '{:.1f}'.format(lmrlib.cloud_tenthscovered2oktas(v)) + except ValueError: + value = entry[1] + except TypeError: + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id12/id_12_phase_1.py b/DRAW-post-processing/post_process_ids/id12/id_12_phase_1.py new file mode 100644 index 0000000..6a44e94 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id12/id_12_phase_1.py @@ -0,0 +1,41 @@ +import phase1_methods as methods +import tables +# id 12 = character fields (type of wind, aurora class, etc) + + +def phase_1(entry): + + return_list = list(entry) + value = entry[1] + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves + inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') + + if value != None and value != '': # deal with non-empty entries + value = value.lower() + + if value in synonyms_empty: # if value purposely left empty + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + if value in inaps: # to unify values + value = 'inapp' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: # general formatting + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = value.replace(";", ".") + return_list[1] = value + if (str.isalpha, value) == "True": + tables.add_to_corrected_table(*return_list, 0) + else: + tables.add_error_edit_code(1, '025', value, return_list[1], return_list) + return None + + elif value == None or value == '': # deal with empty entries + tables.add_to_corrected_table(*return_list, 0) + return None diff --git a/DRAW-post-processing/post_process_ids/id12/id_12_phase_3.py b/DRAW-post-processing/post_process_ids/id12/id_12_phase_3.py new file mode 100644 index 0000000..cc155cc --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id12/id_12_phase_3.py @@ -0,0 +1,11 @@ +import tables +import phase3_methods as methods +# id 12 = character fields (type of wind, aurora class, etc) + + +def phase_3(entry): + return_list = list(entry) + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id13/id_13_phase_1.py b/DRAW-post-processing/post_process_ids/id13/id_13_phase_1.py new file mode 100644 index 0000000..33ac67c --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id13/id_13_phase_1.py @@ -0,0 +1,41 @@ +import phase1_methods as methods +import tables +# id3 = distance (miles), eg distance wind has run +def phase_1(entry): + + return_list = list(entry) + value = entry[1] + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves + inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') + + if value != None and value != '': # deal with non-empty entries + value = value.lower() + if value in synonyms_empty: # if value purposely left empty + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + if value in inaps: # to unify values + value = 0 + tables.add_to_corrected_table(*return_list, 0) + return None + else: # general formatting + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") + return_list[1] = value + try: + v=float(value) + return_list[1]=value + tables.add_to_corrected_table(*return_list, 0) + except ValueError: + tables.add_error_edit_code(1, '024', value, return_list[1], return_list) + return None + + elif value == None or value =='': # deal with empty entries + tables.add_to_corrected_table(*return_list, 0) + return None diff --git a/DRAW-post-processing/post_process_ids/id13/id_13_phase_3.py b/DRAW-post-processing/post_process_ids/id13/id_13_phase_3.py new file mode 100644 index 0000000..6777194 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id13/id_13_phase_3.py @@ -0,0 +1,23 @@ +import tables +import phase3_methods as methods +# id3 = distance (miles), eg distance wind has run + + +def phase_3(entry): + return_list = list(entry) + value = entry[1] + if value != '-999': + try: + v=float(value)/3600.0 + value = '{:.2f}'.format((methods.dis_mi2m(v))) + if entry[4] == 35: + value = value / (3.0) + elif entry[4] == 48: + value = value / (24.0) + except ValueError: + value = entry[1] + except TypeError: + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id2/id_2_phase_1.py b/DRAW-post-processing/post_process_ids/id2/id_2_phase_1.py new file mode 100644 index 0000000..9cf309f --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id2/id_2_phase_1.py @@ -0,0 +1,69 @@ +import phase1_methods as methods +import tables +# id2 = vapour pressure + + +def phase_1(entry): + ''' + Parameters + ---------- + + Returns + ------- + None. + + ERROR CODE ARE NOT UPLOADED !! + ''' + +# check value >0 +# if value >2, divide by 1000 + + alphabet = 'abcdefghijklmnopqrstuvwxyz' + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') +# single_letters = ('S', 's', 'T', 't', 'N', 'n', 'R', 'r') #things I noticed that appear by themselves + inaps = ('inapp', 'inappreciable', 'inap', '?napp', 'napp') + return_list = list(entry) + value = entry[1] + + if value == None or value == '': # in case missing values + tables.add_to_corrected_table(*return_list, 0) + return None + elif value.lower() in inaps: # to unify inaps value + value = 'inapp' + return_list[1] = value.lower() + tables.add_to_corrected_table(*return_list, 0) + return None + elif value.lower() in synonyms_empty: # if the value was purposely put as problematic + return_list[1] = value.lower() + tables.add_to_corrected_table(*return_list, 0) + return None + else: # Only deals with values with data in them + value = value.lower() + value = value.replace(' ', '') + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace("-", "") # remove any negative signs + value = value.replace(";", ".") + value = value.replace("'", '.') # to deal with 3'2 +# value = value.replace('°','') +# value = value.replace('~','') + for elements in value: # my way to remove alphabetical characters + if elements in alphabet: + value = value.replace(elements, '') + elif elements in alphabet.upper(): + value = value.replace(elements, '') + + value = value.replace(',', '.') + value = value.replace('/', '0') + value = value.replace('-', '0') + if len(value) > 0: + if value[0] == '.': # to avoid .6 and put 0.6 + value = '0' + value + if value[-1] == '.': # to avoid 6. and put 6.0 + value = value+'0' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None diff --git a/DRAW-post-processing/post_process_ids/id2/id_2_phase_3.py b/DRAW-post-processing/post_process_ids/id2/id_2_phase_3.py new file mode 100644 index 0000000..b011ae2 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id2/id_2_phase_3.py @@ -0,0 +1,20 @@ +import tables +# import phase3_methods as methods +import lmrlib +# id2 = vapour pressure + + +# transform to hPA +def phase_3(entry): + return_list = list(entry) + value = entry[1] + if value != '-999': + try: + value = '{:.2f}'.format(lmrlib.baro_Eng_in2mb(value)) + except ValueError: + value = entry[1] + except TypeError: + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id3/id_3_phase_1.py b/DRAW-post-processing/post_process_ids/id3/id_3_phase_1.py index d0c2515..490d726 100644 --- a/DRAW-post-processing/post_process_ids/id3/id_3_phase_1.py +++ b/DRAW-post-processing/post_process_ids/id3/id_3_phase_1.py @@ -4,8 +4,6 @@ import phase1_methods as methods - - def phase_1(entry): return_list = list(entry) value = entry[1] @@ -19,20 +17,19 @@ def phase_1(entry): elif value is None: tables.add_to_corrected_table(*return_list, 0) - + elif "dry" in value.lower() or "frozen" in value.lower(): - value=methods.extract_decimal(value, return_list) + value = methods.extract_decimal(value, return_list) tables.add_to_corrected_table(*return_list, 0) - # if not of the right form initially, corrects format and returns entry with corrected value +# if not of the right form initially, corrects format and returns entry with corrected value else: - value = methods.remove_spaces(value, return_list) value = methods.correct_double_decimals(value, return_list) value = methods.remove_alphabetical_char(value, return_list) value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") return_list[1] = value if methods.desired_temperature_format(value): - tables.add_to_corrected_table(*return_list, 0) \ No newline at end of file + tables.add_to_corrected_table(*return_list, 0) diff --git a/DRAW-post-processing/post_process_ids/id3/id_3_phase_3.py b/DRAW-post-processing/post_process_ids/id3/id_3_phase_3.py index ea46b46..4b167e6 100644 --- a/DRAW-post-processing/post_process_ids/id3/id_3_phase_3.py +++ b/DRAW-post-processing/post_process_ids/id3/id_3_phase_3.py @@ -1,16 +1,21 @@ import tables -import phase3_methods as methods +# import phase3_methods as methods +import lmrlib +# temperature values def phase_3(entry): return_list = list(entry) value = entry[1] - if value!='-999': + if value != '-999': try: - value = '{:.2f}'.format(methods.temp_f2c(value)) + value = '{:.2f}'.format(lmrlib.temp_f2c(float(value))) except ValueError: - value=entry[1] + print ("Value error F to C: "+ entry[1]) + value = entry[1] except TypeError: - value=entry[1] + value = entry[1] + print ("Value error F to C: "+ entry[1]) + return_list[1] = value tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id4/id_4_phase_1.py b/DRAW-post-processing/post_process_ids/id4/id_4_phase_1.py index 9f6e35b..d1db264 100644 --- a/DRAW-post-processing/post_process_ids/id4/id_4_phase_1.py +++ b/DRAW-post-processing/post_process_ids/id4/id_4_phase_1.py @@ -1,213 +1,58 @@ import phase1_methods as methods import tables +# id4 = precipitation amount + def phase_1(entry): ''' Parameters ---------- - If the field id is 25, 26, 28 or 29. It represents time. - Here are my assumptions for the time data. The format must be XX:XX. - Example 1: the value is '3', it should be transformed to '03:00' - Example 2: the value is '4':, it should be transformed to '4:00' - Example 3: the value is '12:345', it should be transformed to '12:34' - Example 4: the value is '123', it should be transformed to '1:23' - Example 5: the value is '1234', it should be transformed to '12:34' - Example 6: the value is '1:2', it should be transformed to '1:20' - Example 7: the value is '12:4', it should be transformed to '12:40' - Example 8: the value is '25', it should be transformed to '2:50' - Example 9: the value is '1:8', it should be transformed to '18:00' - Here are examples where it is not possible to transform it: - Example 1: the value is '1:3:00' - Example 2: the value is ':35' - It is assigned '?' in such cases Returns ------- None. - + ERROR CODE ARE NOT UPLOADED !! ''' - - alphabet = 'abcdefghijklmnopqrstuvwxyz' synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') - #single_letters = ('S', 's', 'T', 't', 'N', 'n', 'R', 'r') #things I noticed that appear by themselves - inaps = ('inapp', 'inappreciable', 'inap', '?napp', 'napp') +# single_letters = ('S', 's', 'T', 't', 'N', 'n', 'R', 'r') #things I noticed that appear by themselves + inaps = ('inapp', 'inappreciable', 'inap', '?napp', 'napp','Inapp','r') return_list = list(entry) value = entry[1] - - if value == None or value == '': #in case missing values + if value == None or value == '': # in case missing values tables.add_to_corrected_table(*return_list, 0) return None - - elif value.lower() in inaps: # to unify inaps value - value= 'inapp' - return_list[1] = value.lower() + elif value.lower() in inaps: # to unify inaps value + value = 0.01 + return_list[1] = value tables.add_to_corrected_table(*return_list, 0) return None - - elif value.lower() in synonyms_empty: # if the value was purposely put as problematic + + elif value.lower() in synonyms_empty: # if the value was purposely put as problematic return_list[1] = value.lower() tables.add_to_corrected_table(*return_list, 0) return None - - else: #Only deals with values with data in them + + else: # Only deals with values with data in them value = value.lower() - value = value.replace(' ','') + value = value.replace(' ', '') value = methods.remove_spaces(value, return_list) value = methods.correct_double_decimals(value, return_list) value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - value=value.replace("'",'.') #to deal with 3'2 - #value = value.replace('°','') - #value = value.replace('~','') - - if entry[4] in (25,26,28,29): #treat time - value=value.replace('.','') - value=value.replace(',','') - value=value.replace('(','') - value=value.replace('&','') - value=value.replace('/','') - value=value.replace('-','') - value=value.replace('_','') - - for elements in value: # my way to remove alphabetical characters - if elements in alphabet: - value=value.replace(elements,'') - elif elements in alphabet.upper(): - value=value.replace(elements,'') + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") + value = value.replace("'", '.') # to deal with 3'2 +# value = value.replace('°','') +# value = value.replace('~','') - if ':' in value: # transform to correct format - split_data = value.split(':') # to work with data before and after ':' - if len(split_data)>2: # if there are 2 ':' assign ? - value = '?' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - if len(split_data[0]) == 1: # transform 3:00 to 03:00 - split_data[0] = '0' + split_data[0] - - elif len(split_data[0])>2: # to deal with 210:00, assign ? - value = '?' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - if len(split_data[1]) == 1: # transform 04:0 to 04:00 - split_data[1] = split_data[1] + '0' - - elif len(split_data[1])>2: # in case of 09:321, assign ? - value = '?' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - value = ':'.join(split_data[:2]) # put back both halves together - if value[:2]=='24': #to transform 24:00 to 00:00 - value = '00'+':'+value[3:] - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - - elif len(value) == 1: # single digit case, i.e 7 to 07:00 - value = '0' + value + ':00' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif len(value) == 2: # two digit case - if int(value[:2])>24: # in case of 32, assign 03:20 - value = '0'+value[0]+':'+value[1]+'0' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - if value[:2]=='24': # avoid 24:00 and put 00:00 - value = '00:00' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - value = value + ':00' # transform 22 to 22:00 - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif len(value) == 3: # three digit case, 320 to 03:20 - value ='0' + value[0] + ':' + value[1:] - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif len(value) == 4: # four digit case - if int(value[:2]) > 24: # i.e 4321 should be 04:32 - value = '0'+value[0]+':'+value[1:3] - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - elif value[:2]=='24': # avoid 24:00 and put 00:00 - value = '00'+':'+value[2:] - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - else: - value = value[:2] + ':' + value[2:] #transform 1621 to 16:21 - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif len(value)>4: #12345 - if value[:2]=='24': # avoid 24:00 and put 00:00 - value = '00'+':'+value[2:4] - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - else: - value = value[:2]+':'+value[2:4] # cut the last digit - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - else: - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - - # else: - #No longer time - value=value.replace(',','.') - value = value.replace('/','0') - value = value.replace('-','0') - if len(value)>0: - if value[0]=='.': # to avoid .6 and put 0.6 - value='0'+value - if value[-1]=='.': # to avoid 6. and put 6.0 - value=value+'0' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) + value = value.replace(',', '.') + value = value.replace('/', '0') + value = value.replace('-', '0') + try: + v = float(value) + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + except: + tables.add_error_edit_code(1, '024', value, return_list[1], return_list) return None - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/DRAW-post-processing/post_process_ids/id4/id_4_phase_3.py b/DRAW-post-processing/post_process_ids/id4/id_4_phase_3.py index 5b563e5..f23b66a 100644 --- a/DRAW-post-processing/post_process_ids/id4/id_4_phase_3.py +++ b/DRAW-post-processing/post_process_ids/id4/id_4_phase_3.py @@ -1,10 +1,19 @@ import tables import phase3_methods as methods +# id4 = precipitation amount -# no modification made. Just transfer to + +# transform to mm def phase_3(entry): return_list = list(entry) value = entry[1] + try: + v=float(value) + value = '{:.2f}'.format(methods.depth_in2mm(v)) + except ValueError: + value = entry[1] + except TypeError: + value = entry[1] return_list[1] = value - tables.add_to_final_corrected_table_iso(*return_list) \ No newline at end of file + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id5/id_5_phase_1.py b/DRAW-post-processing/post_process_ids/id5/id_5_phase_1.py index d5aca84..13a0098 100644 --- a/DRAW-post-processing/post_process_ids/id5/id_5_phase_1.py +++ b/DRAW-post-processing/post_process_ids/id5/id_5_phase_1.py @@ -1,10 +1,10 @@ import phase1_methods as methods import tables +# id5 = direction + def phase_1(entry): ''' - - Parameters ---------- entry : TYPE @@ -13,55 +13,33 @@ def phase_1(entry): Returns ------- None. - ''' return_list = list(entry) value = entry[1] - alphabet = 'abcdefghijklmnopqrstuvwxyz' synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') - #single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') - if value != None and value != '': #in case non-empty values + if value != None and value != '': # in case non-empty values value = value.lower() - value = value.replace(' ','') - if value in synonyms_empty: # only treat in case the data is empty - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - else: # value not empty - if value in inaps: # to unify results that are inaps - value= 'inapp' + value = value.replace(' ', '') + if (str.isalpha, value) == 'True': + if value in synonyms_empty: # only treat in case the data is empty return_list[1] = value tables.add_to_corrected_table(*return_list, 0) + elif value == None or value == '': # in case empty values return None - - if entry[4]==24: # special treatment for field_id 24 as value is a number - value = methods.remove_spaces(value, return_list) - value = methods.correct_double_decimals(value, return_list) - value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - for elements in value: # my way to remove alphabetical characters - if elements in alphabet: - value=value.replace(elements,'') - elif elements in alphabet.upper(): - value=value.replace(elements,'') - - else: #deals with the rest of field ids - value = methods.remove_spaces(value, return_list) - value = methods.correct_double_decimals(value, return_list) - value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - counter=0 - for elem in value: # if only numbers in a value where there should at least have a letter, put ? - if elem in alphabet or elem in alphabet.upper(): - counter+=1 - if counter==0: - value='?' - - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif value == None or value=='': #in case empty values - tables.add_to_corrected_table(*return_list, 0) - return None \ No newline at end of file + else: # value not empty + if value in inaps: # to unify results that are inaps + value = 'inapp' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: # deals with the rest of field ids + value = methods.remove_spaces(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + tables.add_error_edit_code(1, '025', value, return_list[1], return_list) diff --git a/DRAW-post-processing/post_process_ids/id5/id_5_phase_3.py b/DRAW-post-processing/post_process_ids/id5/id_5_phase_3.py index 1fa7873..a4e682c 100644 --- a/DRAW-post-processing/post_process_ids/id5/id_5_phase_3.py +++ b/DRAW-post-processing/post_process_ids/id5/id_5_phase_3.py @@ -1,9 +1,26 @@ import tables -import phase3_methods as methods +# import phase3_methods as methods +import lmrlib +import string +# id5 = direction +# transform to degrees def phase_3(entry): return_list = list(entry) - value = entry[1] + try: + value = entry[1] + v=float(value) + if value != '-999': + value = '{:<4}'.format(v) + dc = 1 + imiss = -999 + value = '{:.2f}'.format(lmrlib.wind_4chardir2deg(value, dc, imiss)) + except ValueError: + value = entry[1] + except TypeError: + value = entry[1] + except KeyError: + value='-999' return_list[1] = value - tables.add_to_final_corrected_table_iso(*return_list) \ No newline at end of file + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id6/id_6_phase_1.py b/DRAW-post-processing/post_process_ids/id6/id_6_phase_1.py index 1011e36..ee5909b 100644 --- a/DRAW-post-processing/post_process_ids/id6/id_6_phase_1.py +++ b/DRAW-post-processing/post_process_ids/id6/id_6_phase_1.py @@ -1,72 +1,50 @@ import phase1_methods as methods import tables +# id 6 = velocity + def phase_1(entry): synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') - #single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') alphabet = 'abcdefghijklmnopqrstuvwxyz' return_list = list(entry) value = entry[1] - if value != None and value != '': #in case non-empty value + if value != None and value != '': # in case non-empty value value = value.lower() - value = value.replace(' ','') - if value in synonyms_empty: # only treat in case the data is purposely empty + value = value.replace(' ', '') + if value in synonyms_empty: # only treat in case the data is purposely empty + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + if value in inaps: # to unify results + value = 'inapp' return_list[1] = value tables.add_to_corrected_table(*return_list, 0) return None - else: - - if value in inaps: # to unify results - value= 'inapp' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - if entry[4]==35 or entry[4]==48: #special treatment for field_id 35\48 as value is a number + else: value = methods.remove_spaces(value, return_list) value = methods.correct_double_decimals(value, return_list) value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - for elements in value: # my way to remove alphabetical characters + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace('.', '') # deal with double '.' + value = value.replace(";", ".") + for elements in value: # my way to remove alphabetical characters if elements in alphabet: - value=value.replace(elements,'') + value = value.replace(elements, '') elif elements in alphabet.upper(): - value=value.replace(elements,'') + value = value.replace(elements, '') return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif entry[4]==19: # bug sometimes two entries with same id. Only one id is transformed - counter=0 - for elem in value: # if only numbers in a value where there should at least have a letter, put ? - if elem in alphabet or elem in alphabet.upper(): - counter+=1 - if counter==0: - value='?' - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif entry[4]==34: # this entry is number - value=value.replace('.','') # deal with double '.' - if len(value)==3: # 123 becomes 1.23 - value=value[0]+'.'+value[1:] - elif len(value)==4: # 1234 becomes 12.34 - value=value[:2]+'.'+value[2:] - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - else: # general formatting - value = methods.remove_spaces(value, return_list) - value = methods.correct_double_decimals(value, return_list) - value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) + try: + v=float(value) + return_list[1]=value + tables.add_to_corrected_table(*return_list, 0) + except ValueError: + tables.add_error_edit_code(1, '024', value, return_list[1], return_list) return None - - elif value == None or value=='': # deal with empty values + elif value == None or value == '': # deal with empty values + return_list[1]='empty' tables.add_to_corrected_table(*return_list, 0) - return None \ No newline at end of file + return None diff --git a/DRAW-post-processing/post_process_ids/id6/id_6_phase_3.py b/DRAW-post-processing/post_process_ids/id6/id_6_phase_3.py index 1fa7873..0827371 100644 --- a/DRAW-post-processing/post_process_ids/id6/id_6_phase_3.py +++ b/DRAW-post-processing/post_process_ids/id6/id_6_phase_3.py @@ -1,9 +1,19 @@ import tables import phase3_methods as methods +# id 6 = velocity + def phase_3(entry): return_list = list(entry) value = entry[1] + if value != '-999': + try: + v=float(value) + value = '{:.2f}'.format((methods.vel_mph2mps(v))) + except ValueError: + value = entry[1] + except TypeError: + value = entry[1] return_list[1] = value - tables.add_to_final_corrected_table_iso(*return_list) \ No newline at end of file + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id7/id_7_phase_1.py b/DRAW-post-processing/post_process_ids/id7/id_7_phase_1.py index 16d5d8c..3fa01fb 100644 --- a/DRAW-post-processing/post_process_ids/id7/id_7_phase_1.py +++ b/DRAW-post-processing/post_process_ids/id7/id_7_phase_1.py @@ -1,36 +1,34 @@ import phase1_methods as methods import tables +# id 7 = weather fields -def phase_1(entry): +def phase_1(entry): return_list = list(entry) value = entry[1] synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') - # single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') - - if value != None and value != '': # deal with non-empty entries + + if value != None and value != '': # deal with non-empty entries value = value.lower() - - if value in synonyms_empty: #if value purposely left empty - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - else: - if value in inaps: # to unify values - value= 'inapp' + if (str.isalpha, value) == 'True': + if value in synonyms_empty: # if value purposely left empty return_list[1] = value tables.add_to_corrected_table(*return_list, 0) return None - else: # general formatting - value = methods.remove_spaces(value, return_list) - value = methods.correct_double_decimals(value, return_list) - value = methods.remove_unexpected_characters(value, return_list) - value = value.replace(";",".") - return_list[1] = value - tables.add_to_corrected_table(*return_list, 0) - return None - - elif value == None or value=='': # deal with empty entries + else: + if value in inaps: # to unify values + value = 'inapp' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: # general formatting + value = methods.remove_spaces(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + elif value == None or value == '': # deal with empty entries tables.add_to_corrected_table(*return_list, 0) - return None \ No newline at end of file + return None diff --git a/DRAW-post-processing/post_process_ids/id7/id_7_phase_3.py b/DRAW-post-processing/post_process_ids/id7/id_7_phase_3.py index 1fa7873..218c0ab 100644 --- a/DRAW-post-processing/post_process_ids/id7/id_7_phase_3.py +++ b/DRAW-post-processing/post_process_ids/id7/id_7_phase_3.py @@ -1,9 +1,11 @@ import tables import phase3_methods as methods +# id 7 = weather fields + def phase_3(entry): return_list = list(entry) value = entry[1] return_list[1] = value - tables.add_to_final_corrected_table_iso(*return_list) \ No newline at end of file + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id8/id_8_phase_1.py b/DRAW-post-processing/post_process_ids/id8/id_8_phase_1.py new file mode 100644 index 0000000..f02c92e --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id8/id_8_phase_1.py @@ -0,0 +1,42 @@ +import phase1_methods as methods +import tables +# id8 = cloud type + + +def phase_1(entry): + + return_list = list(entry) + value = entry[1] + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') +# single_letters = ('S', 's', 'T', 't', 'N', 'n') #things I noticed that appear by themselves + inaps = ('inappreciable', '?napp', 'napp', 'inap', 'inapp') + + if value != None and value != '': # deal with non-empty entries + value = value.lower() + if (str.isalpha, value) == 'True': + if value in synonyms_empty: # if value purposely left empty + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + if value in inaps: # to unify values + value = 'inapp' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: # general formatting + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = methods.remove_negatives(value, return_list) + value = methods.remove_question(value, return_list) + value = value.replace(";", ".") + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + else: + tables.add_error_edit_code(1, '025', value, return_list[1], return_list) + return None + + elif value == None or value == '': # deal with empty entries + tables.add_to_corrected_table(*return_list, 0) + return None diff --git a/DRAW-post-processing/post_process_ids/id8/id_8_phase_3.py b/DRAW-post-processing/post_process_ids/id8/id_8_phase_3.py new file mode 100644 index 0000000..61ce557 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id8/id_8_phase_3.py @@ -0,0 +1,11 @@ +import tables +import phase3_methods as methods +# id8 = cloud type + + +def phase_3(entry): + return_list = list(entry) + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/post_process_ids/id9/id_9_phase_1.py b/DRAW-post-processing/post_process_ids/id9/id_9_phase_1.py new file mode 100644 index 0000000..77c23ad --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id9/id_9_phase_1.py @@ -0,0 +1,275 @@ +import phase1_methods as methods +import tables +# id9 = time fields + +def phase_1(entry): + ''' + Parameters + ---------- + If the field id is 25, 26, 28 or 29. It represents time. + Here are my assumptions for the time data. The format must be XX:XX. + Example 1: the value is '3', it should be transformed to '03:00' + Example 2: the value is '4':, it should be transformed to '4:00' + Example 3: the value is '12:345', it should be transformed to '12:34' + Example 4: the value is '123', it should be transformed to '1:23' + Example 5: the value is '1234', it should be transformed to '12:34' + Example 6: the value is '1:2', it should be transformed to '1:20' + Example 7: the value is '12:4', it should be transformed to '12:40' + Example 8: the value is '25', it should be transformed to '2:50' + Example 9: the value is '1:8', it should be transformed to '18:00' + Here are examples where it is not possible to transform it: + Example 1: the value is '1:3:00' + Example 2: the value is ':35' + It is assigned '?' in such cases + Returns + ------- + None. + + ERROR CODE ARE NOT UPLOADED !! + + ''' + + alphabet = 'abcdefghijklmnopqrstuvwxyz' + synonyms_empty = ('empty', 'illegible', 'retracted', 'emptyblank', 'blank') + #single_letters = ('S', 's', 'T', 't', 'N', 'n', 'R', 'r') #things I noticed that appear by themselves + inaps = ('inapp', 'inappreciable', 'inap', '?napp', 'napp') + return_list = list(entry) + value = entry[1] + + if value == None or value == '': #in case missing values + tables.add_to_corrected_table(*return_list, 0) + return None + + elif value.lower() in inaps: # to unify inaps value + value= 'inapp' + return_list[1] = value.lower() + tables.add_to_corrected_table(*return_list, 0) + return None + + elif value.lower() in synonyms_empty: # if the value was purposely put as problematic + return_list[1] = value.lower() + tables.add_to_corrected_table(*return_list, 0) + return None + + else: #Only deals with values with data in them + value = value.lower() + value = value.replace(' ','') + value = methods.remove_spaces(value, return_list) + value = methods.correct_double_decimals(value, return_list) + value = methods.remove_unexpected_characters(value, return_list) + value = value.replace(";",".") + value=value.replace("'",'.') #to deal with 3'2 + #value = value.replace('°','') + #value = value.replace('~','') + + if entry[4] in (25,26,28,29): #treat time + value=value.replace('.','') + value=value.replace(',','') + value=value.replace('(','') + value=value.replace('&','') + value=value.replace('/','') + value=value.replace('-','') + value=value.replace('_','') + + for elements in value: # my way to remove alphabetical characters + if elements in alphabet: + value=value.replace(elements,'') + elif elements in alphabet.upper(): + value=value.replace(elements,'') + + if ':' in value: # transform to correct format + split_data = value.split(':') # to work with data before and after ':' + if len(split_data)>2: # if there are 2 ':' assign ? + value = '?' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + if len(split_data[0]) == 1: # transform 3:00 to 03:00 + split_data[0] = '0' + split_data[0] + + elif len(split_data[0])>2: # to deal with 210:00, assign ? + value = '?' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + if len(split_data[1]) == 1: # transform 04:0 to 04:00 + split_data[1] = split_data[1] + '0' + + elif len(split_data[1])>2: # in case of 09:321, assign ? + value = '?' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + value = ':'.join(split_data[:2]) # put back both halves together + if value[:2]=='24': #to transform 24:00 to 00:00 + value = '00'+':'+value[3:] + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + + elif len(value) == 1: # single digit case, i.e 7 to 07:00 + value = '0' + value + ':00' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + elif len(value) == 2: # two digit case + if int(value[:2])>24: # in case of 32, assign 03:20 + value = '0'+value[0]+':'+value[1]+'0' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + if value[:2]=='24': # avoid 24:00 and put 00:00 + value = '00:00' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + value = value + ':00' # transform 22 to 22:00 + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + elif len(value) == 3: # three digit case, 320 to 03:20 + value ='0' + value[0] + ':' + value[1:] + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + elif len(value) == 4: # four digit case + if int(value[:2]) > 24: # i.e 4321 should be 04:32 + value = '0'+value[0]+':'+value[1:3] + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + elif value[:2]=='24': # avoid 24:00 and put 00:00 + value = '00'+':'+value[2:] + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + value = value[:2] + ':' + value[2:] #transform 1621 to 16:21 + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + elif len(value)>4: #12345 + if value[:2]=='24': # avoid 24:00 and put 00:00 + value = '00'+':'+value[2:4] + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + value = value[:2]+':'+value[2:4] # cut the last digit + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + else: + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + + # else: + #No longer time + value=value.replace(',','.') + value = value.replace('/','0') + value = value.replace('-','0') + if len(value)>0: + if value[0]=='.': # to avoid .6 and put 0.6 + value='0'+value + if value[-1]=='.': # to avoid 6. and put 6.0 + value=value+'0' + return_list[1] = value + tables.add_to_corrected_table(*return_list, 0) + return None + + +# if ':' in value: # transform to correct format +# split_data = value.split(':') # to work with data before and after ':' +# if len(split_data) > 2: # if there are 2 ':' assign ? +# value = '' +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# if len(split_data[0]) == 1: # transform 3:00 to 03:00 +# split_data[0] = '0' + split_data[0] +# elif len(split_data[0]) > 2: # to deal with 210:00, assign ? +# value = '?' +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# if len(split_data[1]) == 1: # transform 04:0 to 04:00 +# split_data[1] = split_data[1] + '0' +# elif len(split_data[1]) > 2: # in case of 09:321, assign ? +# value = '?' +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# value = ':'.join(split_data[:2]) # put back both halves together +# if value[:2] == '24': # to transform 24:00 to 00:00 +# value = '00'+':'+value[3:] +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# elif len(value) == 1: # single digit case, i.e 7 to 07:00 +# value = '0' + value + ':00' +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# elif len(value) == 2: # two digit case +# if int(value[:2]) > 24: # in case of 32, assign 03:20 +# value = '0'+value[0]+':'+value[1]+'0' +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# if value[:2] == '24': # avoid 24:00 and put 00:00 +# value = '00:00' +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# value = value + ':00' # transform 22 to 22:00 +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# elif len(value) == 3: # three digit case, 320 to 03:20 +# value = '0' + value[0] + ':' + value[1:] +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# elif len(value) == 4: # four digit case +# if int(value[:2]) > 24: # i.e 4321 should be 04:32 +# value = '0'+value[0]+':'+value[1:3] +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# elif value[:2] == '24': # avoid 24:00 and put 00:00 +# value = '00' + ':' + value[2:] +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# else: +# value = value[:2] + ':' + value[2:] # transform 1621 to 16:21 +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# elif len(value) > 4: # 12345 +# if value[:2] == '24': # avoid 24:00 and put 00:00 +# value = '00' + ':' + value[2:4] +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# else: +# value = value[:2] + ':' + value[2:4] # cut the last digit +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# else: +# return_list[1] = value +# tables.add_to_corrected_table(*return_list, 0) +# return None +# # else: +# # No longer time diff --git a/DRAW-post-processing/post_process_ids/id9/id_9_phase_3.py b/DRAW-post-processing/post_process_ids/id9/id_9_phase_3.py new file mode 100644 index 0000000..cb611a6 --- /dev/null +++ b/DRAW-post-processing/post_process_ids/id9/id_9_phase_3.py @@ -0,0 +1,11 @@ +import tables +import phase3_methods as methods +# id9 = time fields + + +def phase_3(entry): + return_list = list(entry) + value = entry[1] + + return_list[1] = value + tables.add_to_final_corrected_table_iso(*return_list) diff --git a/DRAW-post-processing/sql_commands.py b/DRAW-post-processing/sql_commands.py index 98aa43e..fdd19e8 100644 --- a/DRAW-post-processing/sql_commands.py +++ b/DRAW-post-processing/sql_commands.py @@ -109,7 +109,7 @@ def ref_adjacent_fluctuations(entry, obs_datetime): field_id = entry[4] return phase_1_data_sql[:len(phase_1_data_sql) - 1] + " WHERE field_id = {} " \ "AND date(observation_date)='{}' order by observation_date asc;".format(field_id, str(obs_datetime)[:10]) - + # retrieves relevant field_id's in ledger sheet, to calculate particular field_id based on other two elements, using equation 1, 2 oe 3 (PHASE 2) def equation_retrieve_row(entry, equation_num): diff --git a/DRAW-post-processing/tables.py b/DRAW-post-processing/tables.py index 287fdc4..f590c89 100644 --- a/DRAW-post-processing/tables.py +++ b/DRAW-post-processing/tables.py @@ -74,7 +74,7 @@ def create_final_corrected_table(continue_flag): # creates 'data_entries_corrected_final_iso' table for post-phase 2 processed data iso transformation def create_final_corrected_table_iso(continue_flag): if continue_flag is False: - cursor.execute("DROP TABLE IF EXISTS data_entries_corrected_final_iso;") + cursor.execute("TRUNCATE TABLE data_entries_corrected_final_iso;") create_table = "CREATE TABLE data_entries_corrected_final_iso AS SELECT * FROM data_entries_corrected_final LIMIT 0;" cursor.execute(create_table) @@ -96,7 +96,7 @@ def add_to_corrected_table(entry_id, value, user_id, page_id, field_id, field_ke def populate_corrected_table(): sql_command = "INSERT INTO data_entries_corrected " \ "(id, value, user_id, page_id, field_id, field_key, annotation_id, transcription_id, post_process_id, observation_date, flagged) " \ - "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" + "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" cursor.executemany(sql_command, corrected_table) db_conn.commit() @@ -111,7 +111,7 @@ def add_to_final_corrected_table_iso(entry_id, value, user_id, page_id, field_id def populate_final_corrected_table_iso(): sql_command = "INSERT INTO data_entries_corrected_final_iso " \ "(id, value, user_id, page_id, field_id, field_key, annotation_id, transcription_id, post_process_id, observation_date, flagged) " \ - "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" + "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" cursor.executemany(sql_command, final_corrected_table_iso) db_conn.commit() @@ -126,7 +126,7 @@ def add_to_final_corrected_table(entry_id, value, user_id, page_id, field_id, fi def populate_final_corrected_table(): sql_command = "INSERT INTO data_entries_corrected_final " \ "(id, value, user_id, page_id, field_id, field_key, annotation_id, transcription_id, post_process_id, observation_date, flagged) " \ - "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" + "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" cursor.executemany(sql_command, final_corrected_table) db_conn.commit() @@ -149,20 +149,20 @@ def populate_error_edit_code(phase): "(id, ORIGINAL_VALUE, CORRECTED_VALUE, error_code, user_id, page_id, field_id, field_key, annotation_id, transcription_id, post_process_id, observation_date, additional_info) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);".format(phase) - if phase==1: + if phase==1: cursor.executemany(sql_command, phase_1_errors) elif phase==2: # cursor.executemany(sql_command, phase_2_errors) #create data frame from phase 2 errors, instead of above #data frame.to_sql - df_temp=pd.DataFrame(phase_2_errors, + df_temp=pd.DataFrame(phase_2_errors, columns=[ 'id', 'ORIGINAL_VALUE', 'CORRECTED_VALUE', 'error_code', 'user_id', 'page_id', 'field_id', 'field_key', 'annotation_id', 'transcription_id', 'post_process_id', 'observation_date', 'additional_info']) df_temp.to_sql('data_entries_phase_2', db.engine, if_exists='append', index=False) - db_conn.commit() + db_conn.commit() + + + - - - # add reconciled observation entry to duplicateless table (after phase 1) def add_to_duplicateless_table(entry_id, value, user_id, page_id, field_id, field_key, annotation_id, transcription_id, post_process_id, observation_date, flagged): global duplicateless @@ -176,7 +176,7 @@ def populate_duplicateless_table(): "(id, value, user_id, page_id, field_id, field_key, annotation_id, transcription_id, post_process_id, observation_date, flagged) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" cursor.executemany(sql_command, duplicateless) - db_conn.commit() + db_conn.commit() # updates duplicateless table - used to update MySQL table during observation reconciliation, before continuing with phase 2 def update_duplicateless_table(value, entry_id):