Web Application Using Python
Matplotlib is one of the most popular data visualization libraries in Python, widely used in both research and industry to visualize data. Since Matplotlib generates static images, we often use Plotly on top of Matplotlib during development to create interactive web dashboards.
For efficiency, we separated the graph into multiple functions. Some functions are already introduced and explained the previous section.
We implemented a cluster plot to show how the users are clustered based on the features collected from the VRs. Since the performance of clustering heavily depends on the algorithms used, we employed two different algorithms: K-Neighbors and DBSCAN. We implemented scatter plots where each point represents a user, with the x-axis showing independent variables and the y-axis showing dependent variables for clustering.
def generate_cluster_plot(data, clustering_algorithm, new_point=None):
features = data[["age", "sentiment"]]
features_normalized = (features - features.mean()) / features.std()
if clustering_algorithm == "KMeans":
kmeans = KMeans(n_clusters=2, random_state=0)
data["cluster"] = kmeans.fit_predict(features_normalized)
else:
dbscan = DBSCAN(eps=0.5, min_samples=5)
data["cluster"] = dbscan.fit_predict(features_normalized)
scatter_plots = []
for cluster_id in data['cluster'].unique():
cluster_data = data[data['cluster'] == cluster_id]
scatter_plot = go.Scatter(
x=cluster_data['age'],
y=cluster_data['sentiment'],
mode='markers',
marker=dict(color=cluster_id, size=10),
text=cluster_data['username'],
name=f"Cluster {cluster_id}",
hoverinfo='text'
)
scatter_plots.append(scatter_plot)
points = np.column_stack((cluster_data["age"], cluster_data["sentiment"]))
if len(points) >= 3:
hull = ConvexHull(points)
boundary_line = go.Scatter(
x=points[hull.vertices, 0],
y=points[hull.vertices, 1],
mode="lines",
line=dict(color="black", width=2),
fill="toself",
fillcolor="rgba(0,0,0,0.1)",
showlegend=False
)
scatter_plots.append(boundary_line)
if new_point:
new_point_plot = go.Scatter(
x=[new_point[0]],
y=[new_point[1]],
mode="markers",
marker=dict(color="red", size=12, symbol="x"),
name="New Point"
)
scatter_plots.append(new_point_plot)
layout = go.Layout(
title="Scatter plot with Clustering",
xaxis=dict(title="Age"),
yaxis=dict(title="Sentiment")
)
fig = go.Figure(data=scatter_plots, layout=layout)
return fig