Skip to content

Commit 163c173

Browse files
authored
Merge pull request larymak#214 from Sneha2319/contribute
Added fake news detection project
2 parents eed9d02 + 8bcddfc commit 163c173

File tree

14 files changed

+167100
-0
lines changed

14 files changed

+167100
-0
lines changed
10 KB
Binary file not shown.

MachineLearning Projects/Fakenews/Fakenews.ipynb

Lines changed: 483 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Introduction
2+
This is a machine learning project that determines fake news through the url of the news.
3+
4+
# Project Structre
5+
This project has four major parts :
6+
7+
* fake_news_detection.py - This contains code fot our Machine Learning model to classify the model
8+
* app.py - This contains Flask APIs that receives news url through GUI or API calls, extracts the article from the url, feeds it to the model and returns the prediction.
9+
* templates - This folder contains the HTML template to allow user to enter url and displays whether the news is fake or real.
10+
* static - This folder contains the CSS file.
11+
12+
# Running the project on local machine
13+
14+
Ensure that you are in the project home directory.
15+
16+
Run app.py using below command to start Flask API
17+
python app.py
18+
By default, flask will run on port 5000.
19+
20+
Navigate to URL http://127.0.0.1:5000
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import numpy as np
2+
from flask import Flask, request,render_template
3+
from flask_cors import CORS
4+
import joblib
5+
import pickle
6+
import flask
7+
import os
8+
import newspaper
9+
from newspaper import Article
10+
import urllib.request
11+
import nltk
12+
nltk.download('punkt')
13+
14+
#Loading Flask and assigning the model variable
15+
app = Flask(__name__)
16+
CORS(app)
17+
app=flask.Flask(__name__,template_folder='templates')
18+
19+
with open('model.pickle', 'rb') as handle:
20+
model = pickle.load(handle)
21+
22+
@app.route('/')
23+
def main():
24+
return render_template('index.html')
25+
26+
#Receiving the input url from the user and using Web Scrapping to extract the news content
27+
@app.route('/predict',methods=['GET','POST'])
28+
def predict():
29+
url =request.get_data(as_text=True)[5:]
30+
url = urllib.parse.unquote(url)
31+
article = Article(str(url))
32+
article.download()
33+
article.parse()
34+
article.nlp()
35+
news = article.summary
36+
#Passing the news article to the model and returing whether it is Fake or Real
37+
pred = model.predict([news])
38+
return render_template('index.html', prediction_text='The news is "{}"'.format(pred[0]))
39+
40+
if __name__=="__main__":
41+
port=int(os.environ.get('PORT',5000))
42+
app.run(port=port,debug=True,use_reloader=False)
43+
44+
45+
46+
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#Importing the libraries
2+
import pandas as pd
3+
import numpy as np
4+
from sklearn.metrics import classification_report, confusion_matrix
5+
from sklearn.pipeline import Pipeline
6+
from sklearn.model_selection import train_test_split
7+
from sklearn.naive_bayes import MultinomialNB
8+
from sklearn.feature_extraction.text import TfidfVectorizer
9+
import pickle
10+
11+
#Importing the cleaned file containing the text and label
12+
news = pd.read_csv('news.csv')
13+
X = news['text']
14+
y = news['label']
15+
16+
17+
18+
19+
#Splitting the data into train
20+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
21+
22+
#Creating a pipeline that first creates bag of words(after applying stopwords) & then applies Multinomial Naive Bayes model
23+
pipeline = Pipeline([('tfidf', TfidfVectorizer(stop_words='english')),
24+
('nbmodel', MultinomialNB())])
25+
26+
#Training our data
27+
pipeline.fit(X_train, y_train)
28+
29+
#Predicting the label for the test data
30+
pred = pipeline.predict(X_test)
31+
32+
#Checking the performance of our model
33+
print(classification_report(y_test, pred))
34+
print(confusion_matrix(y_test, pred))
35+
36+
#Serialising the file
37+
with open('model.pickle', 'wb') as handle:
38+
pickle.dump(pipeline, handle, protocol=pickle.HIGHEST_PROTOCOL)
39+
40+
Loading
Loading
4.58 MB
Binary file not shown.

MachineLearning Projects/Fakenews/news.csv

Lines changed: 166355 additions & 0 deletions
Large diffs are not rendered by default.
Binary file not shown.

0 commit comments

Comments
 (0)