Skip to content

Commit

Permalink
Removed numerical_attrs reference from Table.
Browse files Browse the repository at this point in the history
  • Loading branch information
richardwu committed Jun 22, 2019
1 parent 211bab3 commit 5c940a5
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 6 deletions.
3 changes: 1 addition & 2 deletions dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ def load_data(self, name, fpath, na_values=None, entity_col=None, src_col=None,

# Load raw CSV file/data into a Postgres table 'name' (param).
self.raw_data = Table(name, Source.FILE, na_values=na_values,
exclude_attr_cols=exclude_attr_cols, fpath=fpath,
numerical_attrs=numerical_attrs)
exclude_attr_cols=exclude_attr_cols, fpath=fpath)

df = self.raw_data.df
# Add _tid_ column to dataset that uniquely identifies an entity.
Expand Down
5 changes: 1 addition & 4 deletions dataset/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ class Table:
A wrapper class for Dataset Tables.
"""
def __init__(self, name, src, na_values=None, exclude_attr_cols=['_tid_'],
fpath=None, df=None, schema_name=None, table_query=None, db_engine=None,
numerical_attrs=None):
fpath=None, df=None, schema_name=None, table_query=None, db_engine=None):
"""
:param name: (str) name to assign to dataset.
:param na_values: (str or list[str]) values to interpret as NULL.
Expand Down Expand Up @@ -49,8 +48,6 @@ def __init__(self, name, src, na_values=None, exclude_attr_cols=['_tid_'],
# TODO(richardwu): use COPY FROM instead of loading this into memory
self.df = pd.read_csv(fpath, dtype=str, na_values=na_values, encoding='utf-8')

# handle numerical values
numerical_attrs = numerical_attrs or []
# Normalize the dataframe: drop null columns, convert to lowercase strings, and strip whitespaces.
for attr in self.df.columns.values:
if self.df[attr].isnull().all():
Expand Down

0 comments on commit 5c940a5

Please sign in to comment.