-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcode (2).py
More file actions
184 lines (158 loc) · 8.57 KB
/
Copy pathcode (2).py
File metadata and controls
184 lines (158 loc) · 8.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Title and Description
st.title("Data Product GUI with Robust Error Handling")
st.write("Upload a CSV file to explore, clean, and visualize your data.")
try:
# Data Upload
st.header("Upload CSV File")
uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
if uploaded_file is not None:
try:
# Read the CSV file
st.subheader("Raw Data")
data = pd.read_csv(uploaded_file)
st.write(data)
# Check if DataFrame is empty
if data.empty:
st.warning("The uploaded CSV is empty. Please provide a valid dataset.")
else:
# Data Exploration
st.header("Data Exploration")
# Display Summary Statistics
st.subheader("Summary Statistics")
try:
st.write(data.describe())
except Exception as e:
st.error(f"Error calculating summary statistics: {e}")
# Column Selection for Visualization
st.subheader("Data Visualization")
columns = data.columns.tolist()
if len(columns) < 2:
st.warning("The CSV must have at least two columns for visualization.")
else:
try:
x_axis = st.selectbox("Select X-axis", columns, key="x_axis")
y_axis = st.selectbox("Select Y-axis", columns, key="y_axis")
# Generate and Display Various Plots
if st.button("Generate Plots"):
try:
# Scatter Plot
fig, ax = plt.subplots()
ax.scatter(data[x_axis], data[y_axis], alpha=0.7)
ax.set_title(f"{y_axis} vs {x_axis} - Scatter Plot")
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
st.pyplot(fig)
# Line Plot
fig, ax = plt.subplots()
ax.plot(data[x_axis], data[y_axis], marker='o')
ax.set_title(f"{y_axis} vs {x_axis} - Line Plot")
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
st.pyplot(fig)
# Bar Plot
fig, ax = plt.subplots()
data.groupby(x_axis)[y_axis].mean().plot.bar(ax=ax)
ax.set_title(f"Average {y_axis} by {x_axis} - Bar Plot")
st.pyplot(fig)
# Histogram
fig, ax = plt.subplots()
data[y_axis].plot.hist(bins=20, alpha=0.7, ax=ax)
ax.set_title(f"Histogram of {y_axis}")
st.pyplot(fig)
# Box Plot
fig, ax = plt.subplots()
sns.boxplot(data=data, x=x_axis, y=y_axis, ax=ax)
ax.set_title(f"Box Plot of {y_axis} by {x_axis}")
st.pyplot(fig)
# Violin Plot
fig, ax = plt.subplots()
sns.violinplot(data=data, x=x_axis, y=y_axis, ax=ax)
ax.set_title(f"Violin Plot of {y_axis} by {x_axis}")
st.pyplot(fig)
# Heatmap (Correlation Matrix)
fig, ax = plt.subplots()
corr = data.corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
ax.set_title("Correlation Heatmap")
st.pyplot(fig)
# Pairplot (Scatterplot Matrix)
try:
st.write("Scatterplot Matrix")
sns.pairplot(data, diag_kind='kde')
st.pyplot()
except Exception as e:
st.error(f"Error generating scatterplot matrix: {e}")
# Pie Chart (for categorical data)
if len(data[x_axis].unique()) <= 10: # Limit to 10 categories for pie chart
fig, ax = plt.subplots()
data[x_axis].value_counts().plot.pie(autopct="%1.1f%%", ax=ax)
ax.set_ylabel("")
ax.set_title(f"Distribution of {x_axis}")
st.pyplot(fig)
# KDE Plot
fig, ax = plt.subplots()
sns.kdeplot(data[y_axis], ax=ax)
ax.set_title(f"KDE Plot of {y_axis}")
st.pyplot(fig)
except KeyError as ke:
st.error(f"Invalid key selection for plotting: {ke}")
except Exception as e:
st.error(f"An error occurred while generating the plots: {e}")
except Exception as e:
st.error(f"Error in selecting columns for visualization: {e}")
# Data Cleaning and Preprocessing
st.header("Data Cleaning")
# Handle Missing Values
st.subheader("Handle Missing Values")
try:
missing_option = st.radio(
"Choose how to handle missing values:",
["Do Nothing", "Drop Rows with Missing Values", "Fill with Zero"]
)
if missing_option == "Drop Rows with Missing Values":
data = data.dropna()
st.write("Rows with missing values have been removed.")
st.write(data)
elif missing_option == "Fill with Zero":
data = data.fillna(0)
st.write("Missing values have been filled with zeros.")
st.write(data)
except Exception as e:
st.error(f"An error occurred while handling missing values: {e}")
# Remove Duplicates
st.subheader("Remove Duplicates")
if st.button("Remove Duplicates"):
try:
data = data.drop_duplicates()
st.write("Duplicate rows have been removed.")
st.write(data)
except Exception as e:
st.error(f"An error occurred while removing duplicates: {e}")
# Export Cleaned Data
st.subheader("Export Cleaned Data")
try:
if st.button("Download CSV"):
# Create CSV in memory
csv = data.to_csv(index=False)
st.download_button(
label="Download Cleaned Data as CSV",
data=csv,
file_name="cleaned_data.csv",
mime="text/csv"
)
except Exception as e:
st.error(f"An error occurred while preparing the CSV for download: {e}")
except pd.errors.EmptyDataError:
st.error("The uploaded CSV file appears to be empty. Please upload a valid CSV file.")
except pd.errors.ParserError:
st.error("There was an error parsing the CSV file. Please ensure the file is properly formatted.")
except Exception as e:
st.error(f"An unexpected error occurred while loading the CSV file: {e}")
else:
st.write("Please upload a CSV file to proceed.")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")