-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblack_friday_EDA.py
43 lines (28 loc) · 1.26 KB
/
black_friday_EDA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import logging
import os
from dataprep.clean import clean_headers, clean_df
from dataprep.eda import create_report
from ydata_profiling import ProfileReport
def eda_pandas_profiling(df, name):
"""Generate Pandas Profiling EDA report."""
try:
os.makedirs("reports", exist_ok=True)
print("Generating Pandas Profiling report...")
profile = ProfileReport(df, title="Black Friday EDA - Pandas Profiling", explorative=True)
output_path = f"reports/black_friday_eda_pandas_{name}.html"
profile.to_file(output_path)
print(f"Pandas Profiling report saved at {output_path}")
except Exception as e:
logging.error(f"Error generating Pandas Profiling report: {e}")
def eda_dataprep(df, report_name):
print("Before cleaning:", type(df)) # Check type before any processing
df = clean_headers(df)
print("After clean_headers:", type(df)) # Should still be a DataFrame
_, df = clean_df(df)
print("After clean_df:", type(df))
# Print each column name and its type
for col in df.columns:
print(f"{col}: {df[col].dtype}")
print(df.dtypes)
report = create_report(df) # If df is not a DataFrame, error occurs here
report.save(f"reports/dataprep_eda_report_{report_name}.html")