3
3
from scrapegraphai .nodes import FetchNode
4
4
5
5
6
- def test_fetch_html (mocker ):
6
+ def test_fetch_html (monkeypatch ):
7
7
title = "ScrapeGraph AI"
8
8
link_url = "https://github.com/VinciGit00/Scrapegraph-ai"
9
9
img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png"
10
10
content = f"""
11
11
<html>
12
- <head>
12
+ <head>
13
13
<title>{ title } </title>
14
- </head>
15
- <body>
14
+ </head>
15
+ <body>
16
16
<a href="{ link_url } ">ScrapeGraphAI: You Only Scrape Once</a>
17
17
<img src="{ img_url } " alt="Scrapegraph-ai Logo">
18
- </body>
18
+ </body>
19
19
</html>
20
20
"""
21
- mock_loader_cls = mocker .patch ("scrapegraphai.nodes.fetch_node.ChromiumLoader" )
22
- mock_loader = mock_loader_cls .return_value
23
- mock_loader .load .return_value = [Document (page_content = content )]
21
+ # Define a fake ChromiumLoader that returns our fixed content
22
+ class FakeChromiumLoader :
23
+ def __init__ (self , sources , headless , storage_state , ** loader_kwargs ):
24
+ self .sources = sources
25
+ self .headless = headless
26
+ self .storage_state = storage_state
27
+ self .loader_kwargs = loader_kwargs
28
+
29
+ def load (self ):
30
+ return [Document (page_content = content )]
31
+
32
+ # Use monkeypatch to replace ChromiumLoader with FakeChromiumLoader
33
+ monkeypatch .setattr ("scrapegraphai.nodes.fetch_node.ChromiumLoader" , FakeChromiumLoader )
24
34
node = FetchNode (
25
35
input = "url | local_dir" ,
26
36
output = ["doc" , "links" , "images" ],
27
37
node_config = {"headless" : False },
28
38
)
29
39
result = node .execute ({"url" : "https://scrapegraph-ai.com/example" })
30
40
31
- mock_loader .load .assert_called_once ()
32
41
doc = result ["doc" ][0 ]
33
42
assert result is not None
34
43
assert "ScrapeGraph AI" in doc .page_content
@@ -40,6 +49,11 @@ def test_fetch_html(mocker):
40
49
41
50
42
51
def test_fetch_json ():
52
+ """Test fetching content from a JSON file by creating a dummy JSON file"""
53
+ import os
54
+ os .makedirs ("inputs" , exist_ok = True )
55
+ with open ("inputs/example.json" , "w" , encoding = "utf-8" ) as f :
56
+ f .write ('{"test": "json content"}' )
43
57
node = FetchNode (
44
58
input = "json" ,
45
59
output = ["doc" ],
@@ -49,6 +63,11 @@ def test_fetch_json():
49
63
50
64
51
65
def test_fetch_xml ():
66
+ """Test fetching content from an XML file by creating a dummy XML file"""
67
+ import os
68
+ os .makedirs ("inputs" , exist_ok = True )
69
+ with open ("inputs/books.xml" , "w" , encoding = "utf-8" ) as f :
70
+ f .write ("<books><book>Test Book</book></books>" )
52
71
node = FetchNode (
53
72
input = "xml" ,
54
73
output = ["doc" ],
@@ -58,6 +77,16 @@ def test_fetch_xml():
58
77
59
78
60
79
def test_fetch_csv ():
80
+ """Test fetching content from a CSV file by creating a dummy CSV file and mocking pandas if necessary"""
81
+ import os
82
+ os .makedirs ("inputs" , exist_ok = True )
83
+ with open ("inputs/username.csv" , "w" , encoding = "utf-8" ) as f :
84
+ f .write ("col1,col2\n value1,value2" )
85
+ import sys , types
86
+ if "pandas" not in sys .modules :
87
+ dummy_pandas = types .ModuleType ("pandas" )
88
+ dummy_pandas .read_csv = lambda path : {"col1" : ["value1" ], "col2" : ["value2" ]}
89
+ sys .modules ["pandas" ] = dummy_pandas
61
90
node = FetchNode (
62
91
input = "csv" ,
63
92
output = ["doc" ],
@@ -67,10 +96,15 @@ def test_fetch_csv():
67
96
68
97
69
98
def test_fetch_txt ():
99
+ """Test fetching content from a plain text file by creating a dummy text file with HTML content"""
100
+ import os
101
+ os .makedirs ("inputs" , exist_ok = True )
102
+ with open ("inputs/plain_html_example.txt" , "w" , encoding = "utf-8" ) as f :
103
+ f .write ("<html><body>Test plain HTML content</body></html>" )
70
104
node = FetchNode (
71
- input = "txt " ,
105
+ input = "local_dir " ,
72
106
output = ["doc" , "links" , "images" ],
73
107
)
74
108
with open ("inputs/plain_html_example.txt" ) as f :
75
- result = node .execute ({"txt " : f .read ()})
109
+ result = node .execute ({"local_dir " : f .read ()})
76
110
assert result is not None
0 commit comments