-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWriting Functions, Extracting Date Parts.py
148 lines (120 loc) · 5.7 KB
/
Writing Functions, Extracting Date Parts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 21 10:24:48 2021
@author: Jessica Brown
@position: Economics Intern at MARAD 06/2021 - 07/2021
@personal email: [email protected]
@school email: [email protected]
"""
#Import Pckges
import pandas as pd
#Import txt file
data = "Entrances06062021.txt"
entrances = pd.read_csv(data, delimiter = "|", parse_dates = ["Update Date/Time", "Create Date/Time", "Entrance Date/Time", "Arrival Date/Time"], dtype = {"Filing Port" : str, "Sequence" : str, "Arrival Port" : str, "Agent Name" : str, "Cargo Type Code" : str, "Last Domestic Port" : str, "Last Foreign Port" : str, "Via Port" : str, "Vessel Repair" : str, "Vessel Name" : str, "IMO" : str, "Call Sign Number" : str, "Official Registration Number" : str, "Built Country Code" : str, "Vessel Type" : str, "USCS Code" : str, "Owner Name" : str, "Operator Name" : str, "Operator Country Code" : str, "Registration Country Code" : str})
#Import new txt file
data2 = "Entrances06132021.txt"
entrances2 = pd.read_csv(data2, delimiter = "|", parse_dates = ["Update Date/Time", "Create Date/Time", "Entrance Date/Time", "Arrival Date/Time"], dtype = {"Filing Port" : str, "Sequence" : str, "Arrival Port" : str, "Agent Name" : str, "Cargo Type Code" : str, "Last Domestic Port" : str, "Last Foreign Port" : str, "Via Port" : str, "Vessel Repair" : str, "Vessel Name" : str, "IMO" : str, "Call Sign Number" : str, "Official Registration Number" : str, "Built Country Code" : str, "Vessel Type" : str, "USCS Code" : str, "Owner Name" : str, "Operator Name" : str, "Operator Country Code" : str, "Registration Country Code" : str})
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Concatenate~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#Concatenate 06062021 and 06132021
entrances = pd.concat([entrances, entrances2], axis = 0, ignore_index = True)
#Remove duplicates
entrances = entrances.drop_duplicates()
entrances.reset_index(drop = True, inplace = True)
#~~~~~~~~~~~~~~~~~~~~~~~Variables and Organization 1~~~~~~~~~~~~~~~~~~~~~~~~~#
#Assign variables to necessary columns
imo = entrances["IMO"]
edt = entrances["Entrance Date/Time"]
adt = entrances["Arrival Date/Time"]
cdt = entrances["Create Date/Time"]
udt = entrances["Update Date/Time"]
#Modify datetimes
udt_t = pd.to_datetime(udt, format = "%m/%d/%Y %H:%M")
edt_t = pd.to_datetime(edt, format = "%m/%d/%Y %H:%M")
adt_t = pd.to_datetime(adt, format = "%m/%d/%Y %H:%M")
cdt_t = pd.to_datetime(cdt, format = "%m/%d/%Y %H:%M")
#Assign variables to each column
fp = entrances["Filing Port"]
fy = entrances["Fiscal Year"]
s = entrances["Sequence"]
edt = entrances["Entrance Date/Time"]
adt = entrances["Arrival Date/Time"]
cdt = entrances["Create Date/Time"]
udt = entrances["Update Date/Time"]
an = entrances["Agent Name"]
ctc = entrances["Cargo Type Code"]
df = entrances["Draft Feet"]
di = entrances["Draft Inches"]
ldp = entrances["Last Domestic Port"]
lfp = entrances["Last Foreign Port"]
vp = entrances["Via Port"]
tc = entrances["Total Crew"]
tp = entrances["Total Passengers"]
dp = entrances["Disembarking Passengers"]
vr = entrances["Vessel Repair"]
vn = entrances["Vessel Name"]
imo = entrances["IMO"]
csn = entrances["Call Sign Number"]
orn = entrances["Official Registration Number"]
bcc = entrances["Built Country Code"]
yb = entrances["Year Built"]
vt = entrances["Vessel Type"]
uscs = entrances["USCS Code"]
own = entrances["Owner Name"]
opn = entrances["Operator Name"]
occ = entrances["Operator Country Code"]
rcc = entrances["Registration Country Code"]
gt = entrances["Gross Tonnage"]
nt = entrances["Net Tonnage"]
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Functions~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#Function basics --
#"def" starts function
#here "measure" is function name
#here "index" is what is passed in the function, any word/variable can be used --
#you can also pass more than one variable -- just use all variables in how function is executed
#I used "return" as the function's purpose is to return certain values
#Combine draft feet and draft inches of a certain vessel into sum in inches
def measure(index):
return int(df.loc[index] * 12) + int(di.loc[index])
#Tells type or category of a vessel -- this is a loop
def category(index):
if int(entrances.loc[index, ["Vessel Type"]]) < 110:
return "Other"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 139:
return "Tanker"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 149:
return "Barge"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 199:
return "Tanker"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 299:
return "Dry Bulk"
elif int(entrances.loc[index, ["Vessel Type"]]) == 310:
return "Container"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 331:
return "General Cargo"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 339:
return "Ro-Ro"
elif int(entrances.loc[index, ["Vessel Type"]]) <= 349:
return "Barge"
elif int(entrances.loc[index, ["Vessel Type"]]) == "Passenger":
return "Passenger"
elif int(entrances.loc[index, ["Vessel Type"]]) == "600":
return "Dry Bulk"
else:
return "Other"
#~~~~~~~~~~~~~~~~~~~~~~~~~~~Extracting Dates~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#Import pckg
import datetime as dt
#Extract iso year and make year column
entrances["Year"] = adt.dt.isocalendar().year
#Extract month and make year column -- note: isocalendar returns a week number, week day, and year -- no month, so I use the normal month
entrances["Month"] = adt.dt.month
#Extract iso week and make week column
entrances["Week"] = adt.dt.isocalendar().week
#New Variables
y = entrances["Year"]
m = entrances["Month"]
w = entrances["Week"]
#Print series (isolated columns from dataframe) to check
print(y)
print(m)
print(w)