Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -72,16 +72,38 @@ def preprocess_data(data):
|
|
| 72 |
|
| 73 |
return data
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
def perform_analysis(data):
|
| 76 |
st.header("νμμ λ°μ΄ν° λΆμ")
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
# μμ½ ν΅κ³
|
| 79 |
st.write("μμ½ ν΅κ³:")
|
| 80 |
-
st.write(
|
| 81 |
|
| 82 |
# μκ΄κ΄κ³ ννΈλ§΅
|
| 83 |
st.write("μκ΄κ΄κ³ ννΈλ§΅:")
|
| 84 |
-
numeric_data =
|
| 85 |
if not numeric_data.empty:
|
| 86 |
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
|
| 87 |
fig.update_layout(title='μκ΄κ΄κ³ ννΈλ§΅')
|
|
@@ -89,28 +111,14 @@ def perform_analysis(data):
|
|
| 89 |
else:
|
| 90 |
st.write("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.")
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
if '
|
| 94 |
-
st.write("
|
| 95 |
-
fig = px.
|
| 96 |
-
fig.update_layout(title='κ³Όλͺ©λ³ νμ΅νκ° μ μ λΆν¬')
|
| 97 |
-
st.plotly_chart(fig)
|
| 98 |
-
|
| 99 |
-
# μλ³ μ μ μΆμ΄
|
| 100 |
-
if 'λ¬' in data.columns and 'νμ΅νκ°' in data.columns:
|
| 101 |
-
st.write("μλ³ μ μ μΆμ΄:")
|
| 102 |
-
fig = px.line(data, x='λ¬', y='νμ΅νκ°', color='κ³Όλͺ©', markers=True)
|
| 103 |
-
fig.update_layout(title='μλ³ νμ΅νκ° μ μ μΆμ΄')
|
| 104 |
-
st.plotly_chart(fig)
|
| 105 |
-
|
| 106 |
-
# μκΈ°λ
Έλ ₯λμ νμ΅νκ° κ΄κ³ (νκ·μ κ³Ό R-squared μΆκ°)
|
| 107 |
-
if 'μκΈ°λ
Έλ ₯λ' in data.columns and 'νμ΅νκ°' in data.columns:
|
| 108 |
-
st.write("μκΈ°λ
Έλ ₯λμ νμ΅νκ° κ΄κ³:")
|
| 109 |
-
fig = px.scatter(data, x='μκΈ°λ
Έλ ₯λ', y='νμ΅νκ°', color='κ³Όλͺ©', hover_data=['λ¬'])
|
| 110 |
|
| 111 |
# μ 체 λ°μ΄ν°μ λν νκ·μ μΆκ°
|
| 112 |
-
x =
|
| 113 |
-
y =
|
| 114 |
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
| 115 |
line_x = np.array([x.min(), x.max()])
|
| 116 |
line_y = slope * line_x + intercept
|
|
@@ -118,7 +126,7 @@ def perform_analysis(data):
|
|
| 118 |
|
| 119 |
r_squared = r_value ** 2
|
| 120 |
fig.update_layout(
|
| 121 |
-
title=f'
|
| 122 |
annotations=[
|
| 123 |
dict(
|
| 124 |
x=0.5,
|
|
@@ -132,79 +140,20 @@ def perform_analysis(data):
|
|
| 132 |
)
|
| 133 |
st.plotly_chart(fig)
|
| 134 |
|
| 135 |
-
#
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
y = filtered_data['νμ΅νκ°']
|
| 150 |
-
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
| 151 |
-
line_x = np.array([x.min(), x.max()])
|
| 152 |
-
line_y = slope * line_x + intercept
|
| 153 |
-
fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νκ·μ '))
|
| 154 |
-
|
| 155 |
-
r_squared = r_value ** 2
|
| 156 |
-
fig.update_layout(
|
| 157 |
-
title=f'μκΈ°λ
Έλ ₯λ {effort_range[0]}-{effort_range[1]} λ²μμ νμ΅νκ° κ΄κ³ (R-squared: {r_squared:.4f})',
|
| 158 |
-
annotations=[
|
| 159 |
-
dict(
|
| 160 |
-
x=0.5,
|
| 161 |
-
y=1.05,
|
| 162 |
-
xref='paper',
|
| 163 |
-
yref='paper',
|
| 164 |
-
text=f'R-squared: {r_squared:.4f}',
|
| 165 |
-
showarrow=False,
|
| 166 |
-
)
|
| 167 |
-
]
|
| 168 |
-
)
|
| 169 |
-
st.plotly_chart(fig)
|
| 170 |
-
|
| 171 |
-
# κ³Όλͺ©λ³ μμΈ λΆμ
|
| 172 |
-
if 'κ³Όλͺ©' in data.columns:
|
| 173 |
-
st.write("κ³Όλͺ©λ³ μμΈ λΆμ:")
|
| 174 |
-
selected_subject = st.selectbox("λΆμν κ³Όλͺ© μ ν", data['κ³Όλͺ©'].unique())
|
| 175 |
-
subject_data = data[data['κ³Όλͺ©'] == selected_subject]
|
| 176 |
-
|
| 177 |
-
if 'λ¬' in subject_data.columns and 'νμ΅νκ°' in subject_data.columns:
|
| 178 |
-
fig = px.line(subject_data, x='λ¬', y='νμ΅νκ°', markers=True)
|
| 179 |
-
fig.update_layout(title=f'{selected_subject} μλ³ νμ΅νκ° μ μ μΆμ΄')
|
| 180 |
-
st.plotly_chart(fig)
|
| 181 |
-
|
| 182 |
-
if 'μκΈ°λ
Έλ ₯λ' in subject_data.columns and 'νμ΅νκ°' in subject_data.columns:
|
| 183 |
-
fig = px.scatter(subject_data, x='μκΈ°λ
Έλ ₯λ', y='νμ΅νκ°', hover_data=['λ¬'])
|
| 184 |
-
|
| 185 |
-
# μ νλ κ³Όλͺ©μ λν νκ·μ μΆκ°
|
| 186 |
-
x = subject_data['μκΈ°λ
Έλ ₯λ']
|
| 187 |
-
y = subject_data['νμ΅νκ°']
|
| 188 |
-
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
| 189 |
-
line_x = np.array([x.min(), x.max()])
|
| 190 |
-
line_y = slope * line_x + intercept
|
| 191 |
-
fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νκ·μ '))
|
| 192 |
-
|
| 193 |
-
r_squared = r_value ** 2
|
| 194 |
-
fig.update_layout(
|
| 195 |
-
title=f'{selected_subject} μκΈ°λ
Έλ ₯λμ νμ΅νκ° κ΄κ³ (R-squared: {r_squared:.4f})',
|
| 196 |
-
annotations=[
|
| 197 |
-
dict(
|
| 198 |
-
x=0.5,
|
| 199 |
-
y=1.05,
|
| 200 |
-
xref='paper',
|
| 201 |
-
yref='paper',
|
| 202 |
-
text=f'R-squared: {r_squared:.4f}',
|
| 203 |
-
showarrow=False,
|
| 204 |
-
)
|
| 205 |
-
]
|
| 206 |
-
)
|
| 207 |
-
st.plotly_chart(fig)
|
| 208 |
|
| 209 |
def main():
|
| 210 |
st.title("μΈν°λν°λΈ EDA ν΄ν·")
|
|
|
|
| 72 |
|
| 73 |
return data
|
| 74 |
|
| 75 |
+
def create_slicers(data):
|
| 76 |
+
slicers = {}
|
| 77 |
+
categorical_columns = data.select_dtypes(include=['object', 'category']).columns
|
| 78 |
+
|
| 79 |
+
for col in categorical_columns:
|
| 80 |
+
if data[col].nunique() <= 10: # κ³ μ κ°μ΄ 10κ° μ΄νμΈ κ²½μ°μλ§ μ¬λΌμ΄μ μμ±
|
| 81 |
+
slicers[col] = st.multiselect(f"{col} μ ν", options=sorted(data[col].unique()), default=sorted(data[col].unique()))
|
| 82 |
+
|
| 83 |
+
return slicers
|
| 84 |
+
|
| 85 |
+
def apply_slicers(data, slicers):
|
| 86 |
+
for col, selected_values in slicers.items():
|
| 87 |
+
if selected_values:
|
| 88 |
+
data = data[data[col].isin(selected_values)]
|
| 89 |
+
return data
|
| 90 |
+
|
| 91 |
def perform_analysis(data):
|
| 92 |
st.header("νμμ λ°μ΄ν° λΆμ")
|
| 93 |
|
| 94 |
+
# μ¬λΌμ΄μ μμ±
|
| 95 |
+
slicers = create_slicers(data)
|
| 96 |
+
|
| 97 |
+
# μ¬λΌμ΄μ μ μ©
|
| 98 |
+
filtered_data = apply_slicers(data, slicers)
|
| 99 |
+
|
| 100 |
# μμ½ ν΅κ³
|
| 101 |
st.write("μμ½ ν΅κ³:")
|
| 102 |
+
st.write(filtered_data.describe())
|
| 103 |
|
| 104 |
# μκ΄κ΄κ³ ννΈλ§΅
|
| 105 |
st.write("μκ΄κ΄κ³ ννΈλ§΅:")
|
| 106 |
+
numeric_data = filtered_data.select_dtypes(include=['float64', 'int64'])
|
| 107 |
if not numeric_data.empty:
|
| 108 |
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
|
| 109 |
fig.update_layout(title='μκ΄κ΄κ³ ννΈλ§΅')
|
|
|
|
| 111 |
else:
|
| 112 |
st.write("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.")
|
| 113 |
|
| 114 |
+
# μΆμμΌμμ μ±μ κ΄κ³ λΆμ
|
| 115 |
+
if 'μΆμμΌμ' in filtered_data.columns and 'μ±μ ' in filtered_data.columns:
|
| 116 |
+
st.write("μΆμμΌμμ μ±μ κ΄κ³:")
|
| 117 |
+
fig = px.scatter(filtered_data, x='μΆμμΌμ', y='μ±μ ', color='λ°', hover_data=filtered_data.columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
# μ 체 λ°μ΄ν°μ λν νκ·μ μΆκ°
|
| 120 |
+
x = filtered_data['μΆμμΌμ']
|
| 121 |
+
y = filtered_data['μ±μ ']
|
| 122 |
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
| 123 |
line_x = np.array([x.min(), x.max()])
|
| 124 |
line_y = slope * line_x + intercept
|
|
|
|
| 126 |
|
| 127 |
r_squared = r_value ** 2
|
| 128 |
fig.update_layout(
|
| 129 |
+
title=f'μΆμμΌμμ μ±μ κ΄κ³ (R-squared: {r_squared:.4f})',
|
| 130 |
annotations=[
|
| 131 |
dict(
|
| 132 |
x=0.5,
|
|
|
|
| 140 |
)
|
| 141 |
st.plotly_chart(fig)
|
| 142 |
|
| 143 |
+
# λ°λ³ μ±μ λΆν¬
|
| 144 |
+
if 'λ°' in filtered_data.columns and 'μ±μ ' in filtered_data.columns:
|
| 145 |
+
st.write("λ°λ³ μ±μ λΆν¬:")
|
| 146 |
+
fig = px.box(filtered_data, x='λ°', y='μ±μ ', points="all")
|
| 147 |
+
fig.update_layout(title='λ°λ³ μ±μ λΆν¬')
|
| 148 |
+
st.plotly_chart(fig)
|
| 149 |
+
|
| 150 |
+
# μΆμμΌμ ꡬκ°λ³ μ±μ λΆν¬
|
| 151 |
+
if 'μΆμμΌμ' in filtered_data.columns and 'μ±μ ' in filtered_data.columns:
|
| 152 |
+
st.write("μΆμμΌμ ꡬκ°λ³ μ±μ λΆν¬:")
|
| 153 |
+
filtered_data['μΆμμΌμ_ꡬκ°'] = pd.cut(filtered_data['μΆμμΌμ'], bins=5)
|
| 154 |
+
fig = px.box(filtered_data, x='μΆμμΌμ_ꡬκ°', y='μ±μ ', color='λ°')
|
| 155 |
+
fig.update_layout(title='μΆμμΌμ ꡬκ°λ³ μ±μ λΆν¬')
|
| 156 |
+
st.plotly_chart(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
def main():
|
| 159 |
st.title("μΈν°λν°λΈ EDA ν΄ν·")
|