1+ import streamlit as st
2+ import pandas as pd
3+ import numpy as np
4+ import plotly .express as px
5+ import matplotlib .pyplot as plt
6+ import seaborn as sns
7+ from statsmodels .tsa .arima .model import ARIMA
8+ import datetime
9+ import plotly .graph_objects as go
10+ import networkx as nx
11+
12+
13+ # Load and preprocess data using st.cache
14+ st .cache_data (hash_funcs = {pd .DataFrame : lambda _ : None })
15+ def load_data ():
16+ df = pd .read_csv ('TotalQuestions.csv' , parse_dates = ['Month' ])
17+ df .set_index ('Month' , inplace = True )
18+ return df
19+
20+
21+ # Sidebar navigation
22+ menu = st .sidebar .selectbox ('Navigation' , ['Stack Overflow Question Forecast' , 'Graphical Analysis' , 'Timeline Visualization' ])
23+
24+ if menu == 'Stack Overflow Question Forecast' :
25+ # Load data
26+ df = load_data ()
27+ languages = df .columns .tolist ()
28+
29+
30+ def forecast_questions (df , language , future_month , future_year ):
31+ model = ARIMA (df [language ], order = (5 , 1 , 0 )) # Simple ARIMA model for demonstration
32+ model_fit = model .fit ()
33+ last_date = df .index [- 1 ]
34+ future_date = pd .to_datetime (f'{ future_year } -{ future_month :02d} -01' )
35+ months_ahead = (future_date .year - last_date .year ) * 12 + future_date .month - last_date .month
36+ if months_ahead <= 0 :
37+ raise ValueError ("Prediction must have end after start." )
38+ forecast = model_fit .forecast (steps = months_ahead )
39+ return forecast .iloc [- 1 ] # Correctly accessing the last forecasted value
40+
41+
42+ def generate_forecasts (df , language , start_date , periods ):
43+ model = ARIMA (df [language ], order = (5 , 1 , 0 ))
44+ model_fit = model .fit ()
45+ forecast = model_fit .forecast (steps = periods )
46+ future_dates = pd .date_range (start = start_date , periods = periods , freq = 'M' )
47+ forecast_df = pd .DataFrame ({language : forecast }, index = future_dates )
48+ return forecast_df
49+
50+
51+ # Modify title style
52+ st .markdown (
53+ "<h1 style='color: #87CEEB; font-size: 36px;'>Stack Overflow Question Forecast</h1>" ,
54+ unsafe_allow_html = True
55+ )
56+ st .markdown ("---" , unsafe_allow_html = True )
57+ st .subheader ('Select Programming Language' )
58+ selected_language = st .selectbox ('' , languages )
59+
60+ col1 , col2 = st .columns (2 )
61+ with col1 :
62+ st .subheader ('Select Future Month' )
63+ future_month = st .selectbox ('' , list (range (1 , 13 )),
64+ format_func = lambda x : datetime .date (1900 , x , 1 ).strftime ('%B' ))
65+ with col2 :
66+ st .subheader ('Select Future Year' )
67+ future_year = st .selectbox ('' , list (range (datetime .datetime .now ().year , datetime .datetime .now ().year + 6 )))
68+
69+ # Forecast for the selected month and year
70+ if st .button ('Predict' ):
71+ try :
72+ prediction = forecast_questions (df , selected_language , future_month , future_year )
73+ st .markdown (
74+ f"<div style='background-color: green; color: white; padding: 10px; border-radius: 5px;'><strong>Predicted number of questions for { selected_language } in { datetime .date (1900 , future_month , 1 ).strftime ('%B' )} { future_year } : <span style='color: red;'>{ int (prediction )} </span></strong></div>" ,
75+ unsafe_allow_html = True )
76+
77+ # Generate additional forecasts for plots
78+ start_date = df .index [- 1 ] + pd .offsets .MonthBegin ()
79+ forecast_df = generate_forecasts (df , selected_language , start_date , 12 )
80+
81+ # Plot 1: Count plot of total questions for each month in the selected year
82+ months = pd .date_range (start = f'{ future_year } -01-01' , end = f'{ future_year } -12-31' , freq = 'M' )
83+ month_forecasts = [forecast_questions (df , selected_language , month .month , month .year ) for month in months ]
84+ month_forecast_df = pd .DataFrame ({selected_language : month_forecasts }, index = months )
85+
86+ fig1 = px .bar (month_forecast_df , x = month_forecast_df .index .strftime ('%B' ), y = selected_language ,
87+ title = f'Monthly Predictions for { future_year } ' )
88+ st .plotly_chart (fig1 )
89+
90+ # Plot 2: Sum of total number of questions for the next five years including the predicted year
91+ future_years = list (range (datetime .datetime .now ().year , future_year + 5 ))
92+ year_forecasts = []
93+ for year in future_years :
94+ if year <= df .index [- 1 ].year :
95+ year_forecasts .append (df [df .index .year == year ][selected_language ].sum ())
96+ else :
97+ months = pd .date_range (start = f'{ year } -01-01' , end = f'{ year } -12-31' , freq = 'M' )
98+ year_forecasts .append (
99+ sum ([forecast_questions (df , selected_language , month .month , month .year ) for month in months ]))
100+ year_forecast_df = pd .DataFrame ({selected_language : year_forecasts }, index = future_years )
101+
102+ fig2 = px .bar (year_forecast_df , x = year_forecast_df .index , y = selected_language ,
103+ title = f'Yearly Predictions for Next 5 Years for { selected_language } ' )
104+ st .plotly_chart (fig2 )
105+
106+ # Plot 3: Pie chart of percentage questions predicted for input year month-wise
107+ year_forecast_percent = month_forecast_df / month_forecast_df .sum () * 100
108+ fig3 = px .pie (year_forecast_percent , values = selected_language ,
109+ names = year_forecast_percent .index .strftime ('%B' ),
110+ title = f'Percentage Question Distribution for { selected_language } in { future_year } ' )
111+ st .plotly_chart (fig3 )
112+
113+ # Plot 4: Additional plot as requested (example: line plot for monthly trends)
114+ fig4 = px .line (month_forecast_df , x = month_forecast_df .index , y = selected_language ,
115+ title = f'Monthly Trends for { selected_language } ' )
116+ fig4 .update_traces (mode = 'lines+markers' )
117+ fig4 .update_layout (xaxis_title = 'Date' , yaxis_title = 'Number of Questions' , plot_bgcolor = 'rgba(0, 0, 0, 0)' )
118+ st .plotly_chart (fig4 )
119+
120+ except ValueError as e :
121+ st .error (f"Error: { e } " )
122+
123+ elif menu == 'Graphical Analysis' :
124+
125+ # Modify title style
126+ st .markdown (
127+ "<h1 style='color: #87CEEB; font-size: 36px;'>Graphical Analysis</h1>" ,
128+ unsafe_allow_html = True
129+ )
130+ st .markdown ("---" , unsafe_allow_html = True )
131+
132+ # Load data
133+ df = load_data ()
134+
135+ # 1) Annual Line Chart
136+ df_annual = df .resample ('A' ).sum ()
137+ fig1 = px .line (df_annual , x = df_annual .index , y = df_annual .columns ,
138+ title = 'Timeline of the number of questions per category (2008-2024)' )
139+ st .plotly_chart (fig1 )
140+
141+ # 2) Change in Question Counts Over Time
142+ df_change = df .diff ()
143+ fig2 = px .line (df_change , x = df_change .index , y = df_change .columns ,
144+ title = 'Change in Question Counts for Each Programming Language Over Time' )
145+ st .plotly_chart (fig2 )
146+
147+ # 4) Total Number of Questions by Programming Languages
148+ total_questions_by_language = df .sum ().sort_values (ascending = False )
149+ fig4 = px .bar (x = total_questions_by_language .index , y = total_questions_by_language .values ,
150+ title = 'Total Number of Questions by Programming Languages' )
151+ st .plotly_chart (fig4 )
152+
153+ # 5) Individual Temporal Series for Top 5 Languages
154+ top_5_data = df .sum ().sort_values (ascending = False ).head (5 )
155+ top_5_languages = top_5_data .index .tolist ()
156+ df_top_5 = df [top_5_languages ]
157+ fig5 = px .line (df_top_5 , x = df_top_5 .index , y = df_top_5 .columns ,
158+ title = 'Individual Temporal Series for Top 5 Languages' )
159+ st .plotly_chart (fig5 )
160+
161+ # 6) Total Number of Questions by Day of the Week
162+ daily_total_questions = df .groupby (df .index .dayofweek ).sum ().sum (axis = 1 )
163+ fig6 = px .bar (x = ['Monday' , 'Tuesday' , 'Wednesday' , 'Thursday' , 'Friday' , 'Saturday' , 'Sunday' ], y = daily_total_questions .values ,
164+ title = 'Total Number of Questions by Day of the Week' )
165+ st .plotly_chart (fig6 )
166+
167+ # 7) Heatmap of the Correlation Between Programming Languages
168+ correlation_matrix = df .corr ()
169+ # Replace 'coolwarm' with a valid Plotly colorscale or a custom colorscale definition
170+ fig7 = px .imshow (correlation_matrix , color_continuous_scale = 'thermal' ,title = 'Correlation Heatmap of Programming Languages' )
171+
172+ # Display the plot using Streamlit
173+ st .plotly_chart (fig7 )
174+
175+ # 8) Distribution of Questions for Top 10 Languages
176+ top_10_data = df .sum ().sort_values (ascending = False ).head (10 )
177+ top_10_languages = top_10_data .index .tolist ()
178+ df_top_10 = df [top_10_languages ]
179+ fig8 = px .box (df_top_10 , y = df_top_10 .columns , title = 'Distribution of Questions for Top 10 Programming Languages' )
180+ st .plotly_chart (fig8 )
181+
182+ # Extract top 10 languages by total questions
183+ top_10_data = df .sum ().sort_values (ascending = False ).head (10 )
184+ top_10_languages = top_10_data .index .tolist ()
185+
186+ # Filter the DataFrame to include only the top 10 languages
187+ df_top_10 = df [top_10_languages ]
188+
189+ # Calculate correlation matrix
190+ corr_matrix = df_top_10 .corr ()
191+
192+ # Create a graph from the correlation matrix
193+ G = nx .from_numpy_array (corr_matrix .values )
194+
195+ # Plotting the network
196+ plt .figure (figsize = (12 , 8 ))
197+ plt .style .use ('dark_background' )
198+ pos = nx .spring_layout (G , seed = 42 ) # positions for all nodes
199+
200+ # Draw nodes
201+ nx .draw_networkx_nodes (G , pos , node_size = 1500 , node_color = 'skyblue' , edgecolors = 'grey' )
202+
203+ # Draw edges
204+ nx .draw_networkx_edges (G , pos , edge_color = 'grey' )
205+
206+ # Draw labels
207+ nx .draw_networkx_labels (G , pos , labels = {i : top_10_languages [i ] for i in range (len (top_10_languages ))}, font_size = 10 ,
208+ font_weight = 'bold' )
209+
210+ plt .title ('Network Plot of Top 10 Programming Languages based on Correlation' )
211+ plt .show ()
212+
213+ # Displaying both graphs sequentially
214+ import streamlit as st
215+
216+ # Display Matplotlib graph
217+ st .pyplot (plt )
218+
219+
220+ elif menu == 'Timeline Visualization' :
221+ if menu == 'Timeline Visualization' :
222+ st .markdown (
223+ "<h1 style='color: #87CEEB; font-size: 36px;'>Timeline Visualization</h1>" ,
224+ unsafe_allow_html = True
225+ )
226+ st .markdown ("---" , unsafe_allow_html = True )
227+
228+ # JavaScript to attempt autoplay
229+ autoplay_js = """
230+ <script>
231+ document.addEventListener('DOMContentLoaded', function(event) {
232+ var video = document.getElementById('autoplay-video');
233+ video.play();
234+ });
235+ </script>
236+ """
237+ st .markdown (autoplay_js , unsafe_allow_html = True )
238+
239+ # Display the MP4 video with autoplay and larger size
240+ video_path = 'stack_overflow.mp4' # Replace with your actual video file path
241+ video_bytes = open (video_path , 'rb' ).read ()
242+ st .video (video_bytes , start_time = 0 )
0 commit comments