diff --git a/api_app/analyzers_manager/classes.py b/api_app/analyzers_manager/classes.py index c13257cd92..9a9d39e551 100644 --- a/api_app/analyzers_manager/classes.py +++ b/api_app/analyzers_manager/classes.py @@ -10,16 +10,18 @@ import requests from django.conf import settings +from django.core.files.base import ContentFile from certego_saas.apps.user.models import User from tests.mock_utils import MockUpResponse, if_mock_connections, patch from ..choices import Classification, PythonModuleBasePaths from ..classes import Plugin +from ..helpers import calculate_sha256 from ..models import PythonConfig from .constants import HashChoices, TypeChoices from .exceptions import AnalyzerConfigurationException, AnalyzerRunException -from .models import AnalyzerConfig, AnalyzerReport +from .models import AnalyzerConfig, AnalyzerReport, AnalyzerSourceFile logger = logging.getLogger(__name__) @@ -38,6 +40,58 @@ class BaseAnalyzerMixin(Plugin, metaclass=ABCMeta): SUSPICIOUS_EVALUATION = 35 FALSE_POSITIVE = -50 + @classmethod + def update_support_model(cls, file_name): + pass + + @classmethod + def update_source_file(cls, request_data: Dict, file_name) -> bool: + # check if file is updated + logger.info( + f"Source file update started with request data {request_data}, file name {file_name} and python module {cls.python_module}" + ) + update = False + response = requests.get(**request_data) + response.raise_for_status() + cfile = ContentFile(response.content, name=file_name) + sha_res = calculate_sha256(response.content) + source_file = AnalyzerSourceFile.objects.filter( + file_name=file_name, python_module=cls.python_module + ).first() + # check if source file exists + if source_file: + logger.info(f"Found source file {source_file}") + # check if source file needs to be updated + if source_file.sha256 != sha_res: + logger.info("About to update source file") + source_file.file.delete() + source_file.file = cfile + source_file.sha256 = sha_res + source_file.save() + update = True + else: + logger.info( + f"About to create new source file with file name {file_name} and python module {cls.python_module}" + ) + AnalyzerSourceFile.objects.create( + file_name=file_name, + python_module=cls.python_module, + file=cfile, + sha256=sha_res, + ) + update = True + + return update + + @classmethod + def update_internal_data(cls, request_data: Dict, file_name) -> bool: + update = cls.update_source_file(request_data, file_name) + + if update: + cls.update_support_model(file_name) + + return update + def threat_to_evaluation(self, threat_level): # MAGIC NUMBERS HERE!!! # I know, it should be 25-50-75-100. We raised it a bit because too many false positives were generated diff --git a/api_app/analyzers_manager/migrations/0152_torexitaddress_trancorecord_fireholrecord_and_more.py b/api_app/analyzers_manager/migrations/0152_torexitaddress_trancorecord_fireholrecord_and_more.py new file mode 100644 index 0000000000..eb8df62b86 --- /dev/null +++ b/api_app/analyzers_manager/migrations/0152_torexitaddress_trancorecord_fireholrecord_and_more.py @@ -0,0 +1,113 @@ +# Generated by Django 4.2.17 on 2025-02-21 15:25 + +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("api_app", "0071_delete_last_elastic_report"), + ("analyzers_manager", "0151_analyzer_config_ipquery"), + ] + + operations = [ + migrations.CreateModel( + name="TorExitAddress", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("last_update", models.DateTimeField(auto_now=True)), + ("ip", models.GenericIPAddressField(unique=True)), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="TrancoRecord", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("last_update", models.DateTimeField(auto_now=True)), + ("version", models.IntegerField(default=0)), + ("rank", models.IntegerField()), + ("domain", models.CharField(max_length=512)), + ( + "retrieved_date", + models.DateTimeField(default=django.utils.timezone.now), + ), + ], + options={ + "unique_together": {("rank", "domain", "retrieved_date")}, + }, + ), + migrations.CreateModel( + name="FireHolRecord", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("last_update", models.DateTimeField(auto_now=True)), + ("version", models.IntegerField(default=1)), + ("file_date", models.DateTimeField()), + ("source", models.CharField(max_length=300)), + ("ip_start", models.GenericIPAddressField()), + ("ip_end", models.GenericIPAddressField()), + ("category", models.CharField(max_length=300)), + ], + options={ + "unique_together": {("source", "ip_start", "ip_end", "category")}, + }, + ), + migrations.CreateModel( + name="AnalyzerSourceFile", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("file_name", models.CharField(max_length=512)), + ("file", models.FileField(upload_to="analyzers_source_files")), + ("sha256", models.CharField(max_length=64, unique=True)), + ("last_update", models.DateTimeField(auto_now=True)), + ( + "python_module", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="source_files", + to="api_app.pythonmodule", + ), + ), + ], + options={ + "unique_together": {("file_name", "python_module")}, + }, + ), + ] diff --git a/api_app/analyzers_manager/migrations/0153_remove_firehol_iplist_list_name_parameter.py b/api_app/analyzers_manager/migrations/0153_remove_firehol_iplist_list_name_parameter.py new file mode 100644 index 0000000000..0c13683b49 --- /dev/null +++ b/api_app/analyzers_manager/migrations/0153_remove_firehol_iplist_list_name_parameter.py @@ -0,0 +1,28 @@ +from django.db import migrations + + +def migrate(apps, schema_editor): + PythonModule = apps.get_model("api_app", "PythonModule") + Parameter = apps.get_model("api_app", "Parameter") + pm = PythonModule.objects.get( + module="firehol_iplist.FireHol_IPList", + base_path="api_app.analyzers_manager.observable_analyzers", + ) + Parameter.objects.get(name="list_names", python_module=pm).delete() + + +def reverse_migrate(apps, schema_editor): + pass + + +class Migration(migrations.Migration): + dependencies = [ + ("api_app", "0071_delete_last_elastic_report"), + ( + "analyzers_manager", + "0152_torexitaddress_trancorecord_fireholrecord_and_more", + ), + ] + operations = [ + migrations.RunPython(migrate, reverse_migrate), + ] diff --git a/api_app/analyzers_manager/migrations/0154_alter_update_schedule_firehol_iplist_tranco.py b/api_app/analyzers_manager/migrations/0154_alter_update_schedule_firehol_iplist_tranco.py new file mode 100644 index 0000000000..8cd1408b51 --- /dev/null +++ b/api_app/analyzers_manager/migrations/0154_alter_update_schedule_firehol_iplist_tranco.py @@ -0,0 +1,35 @@ +from django.db import migrations + + +def migrate(apps, schema_editor): + PythonModule = apps.get_model("api_app", "PythonModule") + CrontabSchedule = apps.get_model("django_celery_beat", "CrontabSchedule") + + cron_firehol = CrontabSchedule.objects.get_or_create(minute=10, hour=18)[0] + cron_tranco = CrontabSchedule.objects.get_or_create(minute=0, hour=1)[0] + pm_firehol = PythonModule.objects.get( + module="firehol_iplist.FireHol_IPList", + base_path="api_app.analyzers_manager.observable_analyzers", + ) + pm_tranco = PythonModule.objects.get( + module="tranco.Tranco", + base_path="api_app.analyzers_manager.observable_analyzers", + ) + pm_firehol.update_schedule = cron_firehol + pm_tranco.update_schedule = cron_tranco + pm_firehol.save() + pm_tranco.save() + + +def reverse_migrate(apps, schema_editor): + pass + + +class Migration(migrations.Migration): + dependencies = [ + ("api_app", "0071_delete_last_elastic_report"), + ("analyzers_manager", "0153_remove_firehol_iplist_list_name_parameter"), + ] + operations = [ + migrations.RunPython(migrate, reverse_migrate), + ] diff --git a/api_app/analyzers_manager/models/__init__.py b/api_app/analyzers_manager/models/__init__.py new file mode 100644 index 0000000000..2f35393df8 --- /dev/null +++ b/api_app/analyzers_manager/models/__init__.py @@ -0,0 +1,3 @@ +# flake8: noqa +from .models import * +from .support_models import * diff --git a/api_app/analyzers_manager/models.py b/api_app/analyzers_manager/models/models.py similarity index 95% rename from api_app/analyzers_manager/models.py rename to api_app/analyzers_manager/models/models.py index e1980ad4d8..cdbb4d8c7b 100644 --- a/api_app/analyzers_manager/models.py +++ b/api_app/analyzers_manager/models/models.py @@ -347,3 +347,19 @@ def plugin_type(cls) -> str: @property def config_exception(cls): return AnalyzerConfigurationException + + +class AnalyzerSourceFile(models.Model): + file_name = models.CharField(max_length=512) + python_module = models.ForeignKey( + PythonModule, related_name="source_files", on_delete=models.CASCADE + ) + file = models.FileField(upload_to="analyzers_source_files") + sha256 = models.CharField(unique=True, max_length=64) + last_update = models.DateTimeField(auto_now=True) + + class Meta: + unique_together = ("file_name", "python_module") + + def __str__(self): + return f"file_name: {self.file_name}, sha256:{self.sha256}" diff --git a/api_app/analyzers_manager/models/support_models.py b/api_app/analyzers_manager/models/support_models.py new file mode 100644 index 0000000000..7cd58c5c50 --- /dev/null +++ b/api_app/analyzers_manager/models/support_models.py @@ -0,0 +1,76 @@ +from logging import getLogger + +from django.db import models +from django.utils.timezone import now + +logger = getLogger(__name__) + + +class SupportModel(models.Model): + last_update = models.DateTimeField(auto_now=True) + + class Meta: + abstract = True + + @classmethod + def generate(cls, data): + records = [] + for i, record in enumerate(data): + records.append(cls(**record)) + if i % 10000 == 0 and i != 0 and records: + cls.objects.bulk_create(records, ignore_conflicts=True) + records = [] + if records: + cls.objects.bulk_create(records, ignore_conflicts=True) + + @classmethod + def reset(cls): + cls.objects.all().delete() + + +class FireHolRecord(SupportModel): + version = models.IntegerField(default=1) + file_date = models.DateTimeField() + source = models.CharField(max_length=300) + ip_start = models.GenericIPAddressField() + ip_end = models.GenericIPAddressField() + category = models.CharField(max_length=300) + + class Meta: + unique_together = ("source", "ip_start", "ip_end", "category") + + @classmethod + def generate(cls, data): + records = [] + for i, record in enumerate(data): + logger.debug(f"Record is: {record}") + records.append(cls(**record)) + if i % 10000 == 0 and i != 0 and records: + cls.objects.bulk_create( + records, + update_conflicts=True, + update_fields=["file_date"], + unique_fields=["source", "ip_start", "ip_end", "category"], + ) + records = [] + if records: + cls.objects.bulk_create( + records, + update_conflicts=True, + update_fields=["file_date"], + unique_fields=["source", "ip_start", "ip_end", "category"], + ) + + +class TorExitAddress(SupportModel): + ip = models.GenericIPAddressField(unique=True) + + +class TrancoRecord(SupportModel): + version = models.IntegerField(default=0) + rank = models.IntegerField() + domain = models.CharField(max_length=512) + retrieved_date = models.DateTimeField(default=now) + + class Meta: + unique_together = ("rank", "domain", "retrieved_date") diff --git a/api_app/analyzers_manager/observable_analyzers/firehol_iplist.py b/api_app/analyzers_manager/observable_analyzers/firehol_iplist.py index e0879a6ff0..276fe7c8f7 100644 --- a/api_app/analyzers_manager/observable_analyzers/firehol_iplist.py +++ b/api_app/analyzers_manager/observable_analyzers/firehol_iplist.py @@ -1,124 +1,424 @@ -# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl -# See the file 'LICENSE' for copying permission. - import ipaddress import logging -import os -import traceback -from datetime import datetime +import re -import requests -from django.conf import settings +import dateparser -from api_app.analyzers_manager import classes -from api_app.analyzers_manager.exceptions import ( - AnalyzerConfigurationException, - AnalyzerRunException, -) -from tests.mock_utils import MockUpResponse, if_mock_connections, patch +from api_app.analyzers_manager.classes import ObservableAnalyzer +from api_app.analyzers_manager.models import AnalyzerSourceFile, FireHolRecord logger = logging.getLogger(__name__) -db_path = f"{settings.MEDIA_ROOT}" - - -class FireHol_IPList(classes.ObservableAnalyzer): - list_names: list - - def run(self): - ip = self.observable_name - result = {} - - if not self.list_names: - raise AnalyzerConfigurationException( - "list_names is empty in custom analyzer config, add an iplist" - ) - - for list_name in self.list_names: - result[list_name] = False - self.check_iplist_status(list_name) +class FireHol_IPList(ObservableAnalyzer): + regex_netstat = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/\d{1,2}" + regex_ip = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}" + base_url = ( + "https://raw.githubusercontent.com/firehol/blocklist-ipsets/refs/heads/master" + ) + file_names = [ + "alienvault_reputation.ipset", + "asprox_c2.ipset", + "bambenek_banjori.ipset", + "bambenek_bebloh.ipset", + "bambenek_c2.ipset", + "bambenek_cl.ipset", + "bambenek_cryptowall.ipset", + "bambenek_dircrypt.ipset", + "bambenek_dyre.ipset", + "bambenek_geodo.ipset", + "bambenek_hesperbot.ipset", + "bambenek_matsnu.ipset", + "bambenek_necurs.ipset", + "bambenek_p2pgoz.ipset", + "bambenek_pushdo.ipset", + "bambenek_pykspa.ipset", + "bambenek_qakbot.ipset", + "bambenek_ramnit.ipset", + "bambenek_ranbyus.ipset", + "bambenek_simda.ipset", + "bambenek_suppobox.ipset", + "bambenek_symmi.ipset", + "bambenek_tinba.ipset", + "bambenek_volatile.ipset", + "bbcan177_ms1.netset", + "bbcan177_ms3.netset", + "bds_atif.ipset", + "bitcoin_blockchain_info_1d.ipset", + "bitcoin_blockchain_info_7d.ipset", + "bitcoin_nodes.ipset", + "bitcoin_nodes_1d.ipset", + "bitcoin_nodes_7d.ipset", + "blocklist_de_apache.ipset", + "blocklist_de_bots.ipset", + "blocklist_de_bruteforce.ipset", + "blocklist_de_ftp.ipset", + "blocklist_de_imap.ipset", + "blocklist_de_mail.ipset", + "blocklist_de_sip.ipset", + "blocklist_de_ssh.ipset", + "blocklist_de_strongips.ipset", + "botscout.ipset", + "botscout_1d.ipset", + "botscout_7d.ipset", + "botvrij_dst.ipset", + "botvrij_src.ipset", + "bruteforceblocker.ipset", + "ciarmy.ipset", + "cidr_report_bogons.netset", + "cleanmx_phishing.ipset", + "cleanmx_viruses.ipset", + "cleantalk.ipset", + "cleantalk_1d.ipset", + "cleantalk_new.ipset", + "cleantalk_new_1d.ipset", + "cleantalk_new_7d.ipset", + "cleantalk_top20.ipset", + "coinbl_hosts.ipset", + "coinbl_hosts_browser.ipset", + "coinbl_hosts_optional.ipset", + "coinbl_ips.ipset", + "cruzit_web_attacks.ipset", + "cta_cryptowall.ipset", + "cybercrime.ipset", + "darklist_de.netset", + "datacenters.netset", + "dm_tor.ipset", + "dshield.netset", + "dshield_1d.netset", + "dshield_7d.netset", + "dshield_top_1000.ipset", + "dyndns_ponmocup.ipset", + "esentire_14072015_com.ipset", + "esentire_14072015q_com.ipset", + "esentire_22072014a_com.ipset", + "esentire_22072014b_com.ipset", + "esentire_22072014c_com.ipset", + "esentire_atomictrivia_ru.ipset", + "esentire_auth_update_ru.ipset", + "esentire_burmundisoul_ru.ipset", + "esentire_crazyerror_su.ipset", + "esentire_dagestanskiiviskis_ru.ipset", + "esentire_differentia_ru.ipset", + "esentire_disorderstatus_ru.ipset", + "esentire_dorttlokolrt_com.ipset", + "esentire_downs1_ru.ipset", + "esentire_ebankoalalusys_ru.ipset", + "esentire_emptyarray_ru.ipset", + "esentire_fioartd_com.ipset", + "esentire_getarohirodrons_com.ipset", + "esentire_hasanhashsde_ru.ipset", + "esentire_inleet_ru.ipset", + "esentire_islamislamdi_ru.ipset", + "esentire_krnqlwlplttc_com.ipset", + "esentire_maddox1_ru.ipset", + "esentire_manning1_ru.ipset", + "esentire_misteryherson_ru.ipset", + "esentire_mysebstarion_ru.ipset", + "esentire_smartfoodsglutenfree_kz.ipset", + "esentire_venerologvasan93_ru.ipset", + "esentire_volaya_ru.ipset", + "et_block.netset", + "et_botcc.ipset", + "et_compromised.ipset", + "et_dshield.netset", + "et_spamhaus.netset", + "et_tor.ipset", + "feodo.ipset", + "feodo_badips.ipset", + "firehol_abusers_1d.netset", + "firehol_level1.netset", + "firehol_level2.netset", + "firehol_level3.netset", + "firehol_level4.netset", + "firehol_webclient.netset", + "firehol_webserver.netset", + "gpf_comics.ipset", + "graphiclineweb.netset", + "greensnow.ipset", + "haley_ssh.ipset", + "hphosts_ats.ipset", + "hphosts_emd.ipset", + "hphosts_exp.ipset", + "hphosts_fsa.ipset", + "hphosts_grm.ipset", + "hphosts_hfs.ipset", + "hphosts_hjk.ipset", + "hphosts_mmt.ipset", + "hphosts_pha.ipset", + "hphosts_psh.ipset", + "hphosts_wrz.ipset", + "iblocklist_abuse_palevo.netset", + "iblocklist_abuse_spyeye.netset", + "iblocklist_abuse_zeus.netset", + "iblocklist_ciarmy_malicious.netset", + "iblocklist_cidr_report_bogons.netset", + "iblocklist_cruzit_web_attacks.netset", + "iblocklist_isp_aol.netset", + "iblocklist_isp_att.netset", + "iblocklist_isp_cablevision.netset", + "iblocklist_isp_charter.netset", + "iblocklist_isp_comcast.netset", + "iblocklist_isp_embarq.netset", + "iblocklist_isp_qwest.netset", + "iblocklist_isp_sprint.netset", + "iblocklist_isp_suddenlink.netset", + "iblocklist_isp_twc.netset", + "iblocklist_isp_verizon.netset", + "iblocklist_malc0de.netset", + "iblocklist_onion_router.netset", + "iblocklist_org_activision.netset", + "iblocklist_org_apple.netset", + "iblocklist_org_blizzard.netset", + "iblocklist_org_crowd_control.netset", + "iblocklist_org_electronic_arts.netset", + "iblocklist_org_joost.netset", + "iblocklist_org_linden_lab.netset", + "iblocklist_org_logmein.netset", + "iblocklist_org_ncsoft.netset", + "iblocklist_org_nintendo.netset", + "iblocklist_org_pandora.netset", + "iblocklist_org_pirate_bay.netset", + "iblocklist_org_punkbuster.netset", + "iblocklist_org_riot_games.netset", + "iblocklist_org_sony_online.netset", + "iblocklist_org_square_enix.netset", + "iblocklist_org_steam.netset", + "iblocklist_org_ubisoft.netset", + "iblocklist_org_xfire.netset", + "iblocklist_pedophiles.netset", + "iblocklist_spamhaus_drop.netset", + "iblocklist_yoyo_adservers.netset", + "ipblacklistcloud_recent.ipset", + "ipblacklistcloud_recent_1d.ipset", + "ipblacklistcloud_recent_7d.ipset", + "ipblacklistcloud_top.ipset", + "iw_spamlist.ipset", + "iw_wormlist.ipset", + "lashback_ubl.ipset", + "malc0de.ipset", + "malwaredomainlist.ipset", + "maxmind_proxy_fraud.ipset", + "myip.ipset", + "nixspam.ipset", + "normshield_all_attack.ipset", + "normshield_all_bruteforce.ipset", + "normshield_all_ddosbot.ipset", + "normshield_all_dnsscan.ipset", + "normshield_all_spam.ipset", + "normshield_all_suspicious.ipset", + "normshield_all_wannacry.ipset", + "normshield_all_webscan.ipset", + "normshield_all_wormscan.ipset", + "normshield_high_attack.ipset", + "normshield_high_bruteforce.ipset", + "normshield_high_ddosbot.ipset", + "normshield_high_dnsscan.ipset", + "normshield_high_spam.ipset", + "normshield_high_suspicious.ipset", + "normshield_high_wannacry.ipset", + "normshield_high_webscan.ipset", + "normshield_high_wormscan.ipset", + "nt_malware_dns.ipset", + "nt_malware_http.ipset", + "nt_malware_irc.ipset", + "nt_ssh_7d.ipset", + "nullsecure.ipset", + "packetmail.ipset", + "packetmail_emerging_ips.ipset", + "packetmail_mail.ipset", + "packetmail_ramnode.ipset", + "php_commenters.ipset", + "php_commenters_1d.ipset", + "php_commenters_7d.ipset", + "php_dictionary.ipset", + "php_dictionary_1d.ipset", + "php_dictionary_7d.ipset", + "php_harvesters.ipset", + "php_harvesters_1d.ipset", + "php_harvesters_7d.ipset", + "php_spammers.ipset", + "php_spammers_1d.ipset", + "php_spammers_7d.ipset", + "proxylists.ipset", + "proxylists_1d.ipset", + "proxylists_7d.ipset", + "proxyspy_1d.ipset", + "proxyspy_7d.ipset", + "proxz.ipset", + "proxz_1d.ipset", + "proxz_7d.ipset", + "pushing_inertia_blocklist.netset", + "ransomware_cryptowall_ps.ipset", + "ransomware_feed.ipset", + "ransomware_locky_c2.ipset", + "ransomware_locky_ps.ipset", + "ransomware_online.ipset", + "ransomware_rw.ipset", + "ransomware_teslacrypt_ps.ipset", + "ransomware_torrentlocker_c2.ipset", + "ransomware_torrentlocker_ps.ipset", + "sblam.ipset", + "set_file_timestamps.sh", + "snort_ipfilter.ipset", + "socks_proxy.ipset", + "socks_proxy_1d.ipset", + "socks_proxy_7d.ipset", + "spamhaus_drop.netset", + "spamhaus_edrop.netset", + "sslbl.ipset", + "sslbl_aggressive.ipset", + "sslproxies.ipset", + "sslproxies_1d.ipset", + "sslproxies_7d.ipset", + "stopforumspam_1d.ipset", + "taichung.ipset", + "talosintel_ipfilter.ipset", + "threatcrowd.ipset", + "tor_exits.ipset", + "tor_exits_1d.ipset", + "tor_exits_7d.ipset", + "turris_greylist.ipset", + "urandomusto_dns.ipset", + "urandomusto_ftp.ipset", + "urandomusto_http.ipset", + "urandomusto_mailer.ipset", + "urandomusto_malware.ipset", + "urandomusto_ntp.ipset", + "urandomusto_rdp.ipset", + "urandomusto_smb.ipset", + "urandomusto_spam.ipset", + "urandomusto_ssh.ipset", + "urandomusto_telnet.ipset", + "urandomusto_unspecified.ipset", + "urandomusto_vnc.ipset", + "urlvir.ipset", + "uscert_hidden_cobra.ipset", + "voipbl.netset", + "vxvault.ipset", + "xforce_bccs.ipset", + "xroxy.ipset", + "xroxy_1d.ipset", + "xroxy_7d.ipset", + "yoyo_adservers.ipset", + ] - with open(f"{db_path}/{list_name}", "r", encoding="utf-8") as f: - db = f.read() + def run(self) -> dict: + result = {"found": False} - db_list = db.split("\n") + records = FireHolRecord.objects.filter( + ip_start__lte=self.observable_name, ip_end__gte=self.observable_name + ).values() - for ip_or_subnet in db_list: - if ip_or_subnet and ipaddress.ip_address(ip) in ipaddress.ip_network( - ip_or_subnet - ): - result[list_name] = True - break + categories = {} + for rec in records: + try: + categories[rec["category"]].append( + { + "source": rec["source"], + "file_date": rec["file_date"].strftime("%Y-%m-%d %H:%M:%S"), + "last_update": rec["last_update"].strftime("%Y-%m-%d %H:%M:%S"), + "ip_start": rec["ip_start"], + "ip_end": rec["ip_end"], + } + ) + except KeyError: + categories[rec["category"]] = [ + { + "source": rec["source"], + "file_date": rec["file_date"].strftime("%Y-%m-%d %H:%M:%S"), + "last_update": rec["last_update"].strftime("%Y-%m-%d %H:%M:%S"), + "ip_start": rec["ip_start"], + "ip_end": rec["ip_end"], + } + ] + if categories: + result["found"] = True + result["categories"] = categories return result - @staticmethod - def download_iplist(list_name): - if ".ipset" not in list_name and ".netset" not in list_name: - raise AnalyzerConfigurationException( - f"extension missing from {list_name} (add .ipset or .netset to name)" + @classmethod + def update(cls) -> bool: + general_update = False + for file_name in cls.file_names: + request_data = {"url": f"{cls.base_url}/{file_name}"} + update = cls.update_internal_data( + request_data, + file_name, ) + if update: + general_update = True + return general_update - try: - iplist_location = f"{db_path}/{list_name}" - data_cleaned = "" - - logger.info(f"starting download of {list_name} from firehol iplist") - url = f"https://iplists.firehol.org/files/{list_name}" - r = requests.get(url) - r.raise_for_status() - - data_extracted = r.content.decode() - - for line in data_extracted.splitlines(): - if not line.startswith("#"): - data_cleaned += f"{line}\n" - - with open(iplist_location, "w", encoding="utf-8") as f: - f.write(data_cleaned) + @classmethod + def update_support_model(cls, file_name): + source_file = AnalyzerSourceFile.objects.filter( + file_name=file_name, python_module=cls.python_module + ).first() - if not os.path.exists(iplist_location): - raise AnalyzerRunException(f"failed extraction of {list_name} iplist") + if ".ipset" in file_name: + source = file_name.replace(".ipset", "") + netstat = False + elif ".netset" in file_name: + source = file_name.replace(".netset", "") + netstat = True + else: + return - logger.info(f"ended download of {list_name} from firehol iplist") + comments, ips = [], [] + for line in source_file.file.readlines(): + line = line.decode() + comments.append(line) if line.startswith("#") else ips.append(line) - except Exception as e: - traceback.print_exc() - logger.exception(e) + category = None + file_date = None + for line in comments: + if "# Source File Date: " in line: + datestr = re.sub("# Source File Date: ", "", line.rstrip("\n")) + parsed_date = dateparser.parse(datestr) + file_date = parsed_date + match_category = re.search(r"Category\s+:\s(\w+)", line.rstrip("\n")) + if match_category: + category = match_category.group(1) + if category: + break - def check_iplist_status(self, list_name): - iplist_location = f"{db_path}/{list_name}" + records = {} + # iterating over IP addresses extracted from the file + for i, line in enumerate(ips): - if not os.path.exists(iplist_location): - self.download_iplist(list_name) + # calculate IP range + if netstat: + ip_address_found = re.search( + cls.regex_netstat, + line, + ) + if not ip_address_found: + logger.info( + f"Can't find ip address, line is {line} in file {file_name}" + ) + continue - now = datetime.now() - timestamp = os.path.getctime(iplist_location) - dt_object = datetime.fromtimestamp(timestamp) - time_diff = now - dt_object + ip_address_found = ipaddress.IPv4Network(ip_address_found.group()) + ip_start = str(ip_address_found.network_address) + ip_end = str(ip_address_found.broadcast_address) + else: + ip_address_found = re.search(cls.regex_ip, line) + if not ip_address_found: + logger.info( + f"Can't find ip address, line is {line} in file {file_name}" + ) + continue + ip_start = ip_end = ip_address_found.group() - if time_diff.days < 1: - logger.info("iplist is up to date") - else: - os.remove(iplist_location) - self.download_iplist(list_name) + data = { + "source": source, + "category": category, + "file_date": file_date, + "ip_start": ip_start, + "ip_end": ip_end, + } + records[(source, ip_start, ip_end, category)] = data - @classmethod - def _monkeypatch(cls): - patches = [ - if_mock_connections( - patch( - "requests.get", - return_value=MockUpResponse( - json_data={}, - status_code=200, - text="""0.0.0.0/8\n - 1.10.16.0/20\n - 1.19.0.0/16\n - 3.90.198.217\n""", - ), - ), - ) - ] - return super()._monkeypatch(patches=patches) + FireHolRecord.generate(list(records.values())) diff --git a/api_app/analyzers_manager/observable_analyzers/maxmind.py b/api_app/analyzers_manager/observable_analyzers/maxmind.py index 5938e7bfd4..2127caf8b8 100644 --- a/api_app/analyzers_manager/observable_analyzers/maxmind.py +++ b/api_app/analyzers_manager/observable_analyzers/maxmind.py @@ -1,84 +1,82 @@ # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl # See the file 'LICENSE' for copying permission. -import datetime import logging -import os -import shutil import tarfile +import tempfile +from typing import Dict +from unittest.mock import patch import maxminddb import requests -from django.conf import settings +from django.core.files import File +from django.utils import timezone from geoip2.database import Reader from geoip2.errors import AddressNotFoundError, GeoIP2Error from geoip2.models import ASN, City, Country from api_app.analyzers_manager import classes -from api_app.analyzers_manager.exceptions import ( - AnalyzerConfigurationException, - AnalyzerRunException, -) +from api_app.analyzers_manager.exceptions import AnalyzerRunException +from api_app.analyzers_manager.models import AnalyzerSourceFile +from api_app.helpers import calculate_sha256 from api_app.models import PluginConfig -from tests.mock_utils import if_mock_connections, patch +from tests.mock_utils import if_mock_connections logger = logging.getLogger(__name__) -class MaxmindDBManager: +class Maxmind(classes.ObservableAnalyzer): + _api_key_name: str _supported_dbs: [str] = ["GeoLite2-Country", "GeoLite2-City", "GeoLite2-ASN"] _default_db_extension: str = ".mmdb" - @classmethod - def get_supported_dbs(cls) -> [str]: - return [db_name + cls._default_db_extension for db_name in cls._supported_dbs] - - @classmethod - def update_all_dbs(cls, api_key: str) -> bool: - return all(cls._update_db(db, api_key) for db in cls._supported_dbs) - - def query_all_dbs(self, observable_query: str, api_key: str) -> (dict, dict): + def run(self): maxmind_final_result: {} = {} maxmind_errors: [] = [] - for db in self._supported_dbs: - maxmind_result, maxmind_error = self._query_single_db( - observable_query, db, api_key - ) + source_files = AnalyzerSourceFile.objects.filter( + python_module=self.python_module + ) + if not source_files: + raise AnalyzerRunException("No source file found") + + for source_file in source_files: + maxmind_result, maxmind_error = self._query_single_db(source_file) if maxmind_error: maxmind_errors.append(maxmind_error["error"]) elif maxmind_result: - logger.info(f"maxmind result: {maxmind_result} in {db=}") + logger.info( + f"maxmind result: {maxmind_result} in {source_file.file_name}" + ) maxmind_final_result.update(maxmind_result) else: - logger.warning(f"maxmind result not available in {db=}") - - return maxmind_final_result, maxmind_errors + logger.warning( + f"maxmind result not available in {source_file.file_name}" + ) - @classmethod - def _get_physical_location(cls, db: str) -> str: - return f"{settings.MEDIA_ROOT}/{db}{cls._default_db_extension}" + if maxmind_errors: + for error_msg in maxmind_errors: + self.report.errors.append(error_msg) + self.report.save() + return maxmind_final_result - def _query_single_db( - self, query_ip: str, db_name: str, api_key: str - ) -> (dict, dict): + def _query_single_db(self, source_file) -> (dict, dict): result: ASN | City | Country - db_path: str = self._get_physical_location(db_name) - self._check_and_update_db(api_key, db_name) - logger.info(f"Query {db_name=} for {query_ip=}") - with Reader(db_path) as reader: + logger.info(f"Query {source_file.file_name} for {self.observable_name}") + + with Reader(source_file.file, mode=maxminddb.MODE_FD) as reader: try: - if "ASN" in db_name: - result = reader.asn(query_ip) - elif "Country" in db_name: - result = reader.country(query_ip) - elif "City" in db_name: - result = reader.city(query_ip) + if "ASN" in source_file.file_name: + result = reader.asn(self.observable_name) + elif "Country" in source_file.file_name: + result = reader.country(self.observable_name) + elif "City" in source_file.file_name: + result = reader.city(self.observable_name) except AddressNotFoundError: reader.close() logger.info( - f"Query for observable '{query_ip}' " + f"Query for observable '{self.observable_name}' " "didn't produce any results in any db." ) return {}, {} @@ -90,127 +88,16 @@ def _query_single_db( reader.close() return result.raw, {} - def _check_and_update_db(self, api_key: str, db_name: str): - db_path = self._get_physical_location(db_name) - if not os.path.isfile(db_path) and not self._update_db(db_name, api_key): - raise AnalyzerRunException( - f"failed extraction of maxmind db {db_name}," - " reached max number of attempts" - ) - if not os.path.exists(db_path): - raise maxminddb.InvalidDatabaseError( - f"database location '{db_path}' does not exist" - ) - - @classmethod - def _update_db(cls, db: str, api_key: str) -> bool: - if not api_key: - raise AnalyzerConfigurationException( - f"Unable to find api key for {cls.__name__}" - ) - - try: - logger.info(f"starting download of {db=} from maxmind") - - tar_db_path = cls._download_db(db, api_key) - cls._extract_db_to_media_root(tar_db_path) - directory_found = cls._remove_old_db(db) - - if not directory_found: - return False - - logger.info(f"ended download of {db=} from maxmind") - return True - - except Exception as e: - logger.exception(e) - return False - - @classmethod - def _download_db(cls, db_name: str, api_key: str) -> str: - url = ( - "https://download.maxmind.com/app/geoip_download?edition_id=" - f"{db_name}&license_key={api_key}&suffix=tar.gz" - ) - response = requests.get(url) - if response.status_code >= 300: - raise AnalyzerRunException( - f"failed request for new maxmind db {db_name}." - f" Status code: {response.status_code}" - f"\nResponse: {response.raw}" - ) - - return cls._write_db_to_filesystem(db_name, response.content) - - @classmethod - def _write_db_to_filesystem(cls, db_name: str, content: bytes) -> str: - tar_db_path = f"/tmp/{db_name}.tar.gz" - logger.info( - f"starting writing db {db_name} downloaded from maxmind to {tar_db_path}" - ) - with open(tar_db_path, "wb") as f: - f.write(content) - - return tar_db_path - - @classmethod - def _extract_db_to_media_root(cls, tar_db_path: str): - logger.info(f"Started extracting {tar_db_path} to {settings.MEDIA_ROOT}.") - tf = tarfile.open(tar_db_path) - tf.extractall(str(settings.MEDIA_ROOT)) - logger.info(f"Finished extracting {tar_db_path} to {settings.MEDIA_ROOT}.") - - @classmethod - def _remove_old_db(cls, db: str) -> bool: - physical_db_location = cls._get_physical_location(db) - today = datetime.datetime.now().date() - counter = 0 - directory_found = False - # this is because we do not know the exact date of the db we downloaded - while counter < 10 or not directory_found: - formatted_date = (today - datetime.timedelta(days=counter)).strftime( - "%Y%m%d" - ) - downloaded_db_path = ( - f"{settings.MEDIA_ROOT}/" - f"{db}_{formatted_date}/{db}{cls._default_db_extension}" - ) - try: - os.rename(downloaded_db_path, physical_db_location) - except FileNotFoundError: - logger.debug(f"{downloaded_db_path} not found move to the day before") - counter += 1 - else: - directory_found = True - shutil.rmtree(f"{settings.MEDIA_ROOT}/" f"{db}_{formatted_date}") - logger.info(f"maxmind directory found {downloaded_db_path}") - return directory_found - - -class Maxmind(classes.ObservableAnalyzer): - _api_key_name: str - _maxmind_db_manager: "MaxmindDBManager" = MaxmindDBManager() - - def run(self): - maxmind_final_result, maxmind_errors = self._maxmind_db_manager.query_all_dbs( - self.observable_name, self._api_key_name - ) - if maxmind_errors: - for error_msg in maxmind_errors: - self.report.errors.append(error_msg) - self.report.save() - return maxmind_final_result - @classmethod def get_db_names(cls) -> [str]: - return cls._maxmind_db_manager.get_supported_dbs() + return [db_name + cls._default_db_extension for db_name in cls._supported_dbs] @classmethod def _get_api_key(cls): for plugin in PluginConfig.objects.filter( parameter__python_module=cls.python_module, parameter__is_secret=True, - parameter__name="_api_key_name", + parameter__name="api_key_name", ): if plugin.value: return plugin.value @@ -219,9 +106,92 @@ def _get_api_key(cls): @classmethod def update(cls) -> bool: auth_token = cls._get_api_key() + general_update = False if auth_token: - return cls._maxmind_db_manager.update_all_dbs(cls._api_key_name) - return False + for db_name in cls._supported_dbs: + request_data = { + "url": ( + "https://download.maxmind.com/app/geoip_download?edition_id=" + f"{db_name}&license_key={auth_token}&suffix=tar.gz" + ) + } + file_name = f"{db_name}{cls._default_db_extension}" + update = cls.update_source_file( + request_data, + file_name, + ) + if update: + general_update = True + else: + logger.error("Missing api key") + return general_update + + @classmethod + def update_source_file(cls, request_data: Dict, file_name) -> bool: + # check if file is updated + logger.info( + f"Source file update started with request data {request_data}, file name {file_name} and python module {cls.python_module}" + ) + update = False + response = requests.get(**request_data) + response.raise_for_status() + # extract maxmind db file + db_name = file_name.replace(cls._default_db_extension, "") + + with tempfile.TemporaryDirectory() as tempdirname: + tar_db_path = f"{tempdirname}/{db_name}.tar.gz" + with open(tar_db_path, "wb") as f: + f.write(response.content) + tf = tarfile.open(tar_db_path) + tf.extractall(tempdirname) + + for counter in range(10): + formatted_date = ( + timezone.now().date() - timezone.timedelta(days=counter) + ).strftime("%Y%m%d") + + try: + file_path = f"{tempdirname}/{db_name}_{formatted_date}/{db_name}{cls._default_db_extension}" + with open( + file_path, + "rb", + ) as f: + logger.info(f"Found file {file_path}") + mmdb_file = File(f, name=file_name) + + sha_res = calculate_sha256(mmdb_file.file.read()) + source_file = AnalyzerSourceFile.objects.filter( + file_name=file_name, python_module=cls.python_module + ).first() + # check if source file exists + if source_file: + logger.info(f"Found source file {source_file}") + # check if source file needs to be updated + if source_file.sha256 != sha_res: + logger.info("About to update source file") + source_file.file.delete() + source_file.file = mmdb_file + source_file.sha256 = sha_res + source_file.save() + update = True + else: + logger.info( + f"About to create new source file with file name {file_name} and python module {cls.python_module}" + ) + AnalyzerSourceFile.objects.create( + file_name=file_name, + python_module=cls.python_module, + file=mmdb_file, + sha256=sha_res, + ) + update = True + + break + except FileNotFoundError: + logger.info(f"{file_path} not found") + continue + + return update @classmethod def _monkeypatch(cls): diff --git a/api_app/analyzers_manager/observable_analyzers/tor.py b/api_app/analyzers_manager/observable_analyzers/tor.py index 9fbe7a8957..44993b185d 100644 --- a/api_app/analyzers_manager/observable_analyzers/tor.py +++ b/api_app/analyzers_manager/observable_analyzers/tor.py @@ -1,16 +1,12 @@ # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl # See the file 'LICENSE' for copying permission. - import logging -import os import re -import requests from django.conf import settings from api_app.analyzers_manager import classes -from api_app.analyzers_manager.exceptions import AnalyzerRunException -from tests.mock_utils import MockUpResponse, if_mock_connections, patch +from api_app.analyzers_manager.models import AnalyzerSourceFile, TorExitAddress logger = logging.getLogger(__name__) @@ -19,69 +15,50 @@ class Tor(classes.ObservableAnalyzer): + file_name = "tor_exit_addresses.txt" + url = "https://check.torproject.org/exit-addresses" + regex_ip = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}" + def _do_create_data_model(self) -> bool: return super()._do_create_data_model() and self.report.report["found"] def run(self): result = {"found": False} - if not os.path.isfile(database_location) and not self.update(): - raise AnalyzerRunException("Failed extraction of tor db") - - if not os.path.exists(database_location): - raise AnalyzerRunException( - f"database location {database_location} does not exist" - ) - with open(database_location, "r", encoding="utf-8") as f: - db = f.read() - - db_list = db.split("\n") - if self.observable_name in db_list: + tor_exit_address = TorExitAddress.objects.filter( + ip=self.observable_name + ).exists() + if tor_exit_address: result["found"] = True return result @classmethod def update(cls): - try: - logger.info("starting download of db from tor project") - url = "https://check.torproject.org/exit-addresses" - r = requests.get(url) - r.raise_for_status() - - data_extracted = r.content.decode() - findings = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", data_extracted) + request_data = {"url": cls.url} + return cls.update_internal_data( + request_data, + cls.file_name, + ) - with open(database_location, "w", encoding="utf-8") as f: - for ip in findings: - if ip: - f.write(f"{ip}\n") + @classmethod + def update_support_model(cls, file_name): + source_file = AnalyzerSourceFile.objects.filter( + file_name=file_name, python_module=cls.python_module + ).first() - if not os.path.exists(database_location): - return False + records = [] + for line in source_file.file.readlines(): + line = line.decode() + ip_address_found = re.search(cls.regex_ip, line) + if ip_address_found: + ip_address_found = ip_address_found.group() - logger.info("ended download of db from tor project") - return True - except Exception as e: - logger.exception(e) + records.append({"ip": ip_address_found}) - return False + TorExitAddress.generate(records) @classmethod def _monkeypatch(cls): - patches = [ - if_mock_connections( - patch( - "requests.get", - return_value=MockUpResponse( - {}, - 200, - content=b"""ExitNode D2A4BEE6754A9711EB0FAC47F3059BE6FC0D72C7 -Published 2022-08-17 18:11:11 -LastStatus 2022-08-18 14:00:00 -ExitAddress 93.95.230.253 2022-08-18 14:44:33""", - ), - ), - ) - ] + patches = [] return super()._monkeypatch(patches=patches) diff --git a/api_app/analyzers_manager/observable_analyzers/tranco.py b/api_app/analyzers_manager/observable_analyzers/tranco.py index 25ea510e28..9d44651369 100644 --- a/api_app/analyzers_manager/observable_analyzers/tranco.py +++ b/api_app/analyzers_manager/observable_analyzers/tranco.py @@ -1,41 +1,77 @@ # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl # See the file 'LICENSE' for copying permission. - +import logging +from io import BytesIO from urllib.parse import urlparse - -import requests +from zipfile import ZipFile from api_app.analyzers_manager import classes -from api_app.choices import Classification -from tests.mock_utils import MockUpResponse, if_mock_connections, patch +from api_app.analyzers_manager.models import AnalyzerSourceFile, TrancoRecord + +logger = logging.getLogger(__name__) class Tranco(classes.ObservableAnalyzer): - url: str = "https://tranco-list.eu/api/ranks/domain/" + url: str = "https://tranco-list.s3.amazonaws.com/top-1m.csv.zip" @classmethod def update(cls) -> bool: - pass + request_data = { + "url": cls.url, + } + return cls.update_internal_data( + request_data, + "tranco_ranks.zip", + ) + + @classmethod + def update_support_model(cls, file_name): + source_file = AnalyzerSourceFile.objects.filter( + file_name=file_name, python_module=cls.python_module + ).first() + + records = [] + with ZipFile(BytesIO(source_file.file.read())) as thezip: + with thezip.open("top-1m.csv") as f: + for i, line in enumerate(f.readlines()): + rank, domain = line.decode().strip().split(",") + records.append( + { + "rank": rank, + "domain": domain, + } + ) + TrancoRecord.generate(records) def run(self): - observable_to_analyze = self.observable_name - if self.observable_classification == Classification.URL: - observable_to_analyze = urlparse(self.observable_name).hostname + result = {"found": False} + + domain_extracted = urlparse(self.observable_name).hostname + if domain_extracted: + domain_to_evaluate = domain_extracted + else: + domain_to_evaluate = self.observable_name + + if domain_to_evaluate.startswith("www."): + domain_to_evaluate = domain_to_evaluate[4:] + + records = ( + TrancoRecord.objects.filter(domain=domain_to_evaluate) + .order_by("-retrieved_date") + .values() + ) + + for rec in records: + rec["last_update"] = rec["last_update"].strftime("%Y-%m-%d %H:%M:%S") + rec["retrieved_date"] = rec["retrieved_date"].strftime("%Y-%m-%d %H:%M:%S") - url = self.url + observable_to_analyze - response = requests.get(url) - response.raise_for_status() + if records: + result["found"] = True + result["ranks"] = list(records) - return response.json() + return result @classmethod def _monkeypatch(cls): - patches = [ - if_mock_connections( - patch( - "requests.get", - return_value=MockUpResponse({}, 200), - ), - ) - ] + patches = [] return super()._monkeypatch(patches=patches)