Merge pull request 'feat/add-data' (#3) from feat/add-data into main

Reviewed-on: #3
2024-08-31 12:43:11 +00:00
parent c31a57f537 61b238e87b
commit 6a1a5c11f0
16 changed files with 86570 additions and 1 deletions
--- a/data/base_data.json
+++ b/data/base_data.json
--- a/data/customers.json
+++ b/data/customers.json
--- a/data/female_gnames.txt
+++ b/data/female_gnames.txt
--- a/data/male_gnames.txt
+++ b/data/male_gnames.txt
--- a/data/names.txt
+++ b/data/names.txt
--- a/data/street_names.txt
+++ b/data/street_names.txt
--- a/data/zip_codes.txt
+++ b/data/zip_codes.txt
--- a/data_preparation/config.py
+++ b/data_preparation/config.py
@@ -0,0 +1,7 @@
+"""Global configuration for data preprocessing."""
+
+from pathlib import Path
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+DATA_DIR = BASE_DIR / "data"
+APP_DIR = BASE_DIR / "app"
--- a/data_preparation/data_utils.py
+++ b/data_preparation/data_utils.py
@@ -0,0 +1,34 @@
+import json
+from datetime import datetime
+from typing import Any
+
+
+class DateTimeEncoder(json.JSONEncoder):
+    """A custom JSON encoder that handles datetime objects.
+
+    This encoder extends the json.JSONEncoder class to provide serialization
+    support for datetime objects, converting them to ISO format strings.
+
+    Methods
+    -------
+    default(obj: Any) -> str
+        Encode datetime objects as ISO format strings.
+    """
+
+    def default(self, obj: Any) -> str:
+        """Encode the given object as a JSON-serializable type.
+
+        Parameters
+        ----------
+        obj : Any
+            The object to encode.
+
+        Returns
+        -------
+        str
+            The ISO format string if the object is a datetime, otherwise
+            delegates to the superclass encoder.
+        """
+        if isinstance(obj, datetime):
+            return obj.isoformat()
+        return super().default(obj)
--- a/data_preparation/generate_customers.py
+++ b/data_preparation/generate_customers.py
@@ -0,0 +1,222 @@
+"""Module to randomly generate a JSON file with customer data for the Avacon app."""
+
+import json
+import math
+import random
+from datetime import datetime, timedelta
+from typing import Dict, List, Tuple
+
+import pandas as pd
+from config import DATA_DIR
+from data_utils import DateTimeEncoder
+
+
+def generate_readings(num_customers: int) -> List[Dict[str, Tuple[str, List[datetime], List[int]]]]:
+    """Generate simulated meter readings for a specified number of customers.
+
+    This function creates synthetic data for both natural gas and electricity
+    meter readings. It simulates readings based on average consumption patterns
+    in Germany, applying random variations to model real-world scenarios.
+
+    Parameters
+    ----------
+    num_customers : int
+        The number of customers for which to generate meter readings.
+
+    Returns
+    -------
+    List[Dict[str, Tuple[str, List[datetime], List[int]]]]
+        A list of dictionaries, where each dictionary represents a customer and
+        contains:
+        - 'electricity': A tuple of (meter number, list of reading dates, list of readings)
+        - 'gas': A tuple of (meter number, list of reading dates, list of readings)
+    """
+
+    # NOTE: Of course, natural gas and electricity consumption depend on various factors, such as
+    # size of the household, age of the building, insulation, etc. For the sake of this example,
+    # we will simply take the known average value and sample from a Gaussian distribution around
+    # this value, with a standard deviation of 10% of the mean.
+    # For the mean we assumed the average size of a flat in Germany (90m²) and the average
+    # consumption of 140 kWh/m², which we need to convert to m³ for the meter reading. To do this,
+    # I am using the calorific value of natural gas. I assumed a value around 11.215 kWh/m³. I also
+    # need to account for slight pressure differences using the conversion factor ("Zustandszahl").
+    mean_natural_gas = 12600  # in kWh
+    calorific_value = 11.215  # in kWh/m³
+    conversion_factor = 0.9692
+
+    mean_cubic = mean_natural_gas / (calorific_value * conversion_factor)
+
+    # For electricity, we take as average consumption the one of a 2-person household in Germany.
+    mean_electricity = 3500  # in kWh
+
+    readings = []
+    # For each customer, generate between 1 and 10 readings (we assume that natural gas and
+    # electricity are always read at the same time)
+    for _ in range(num_customers):
+        # The initial reading of the customers meter
+        gas_reading = random.randint(1000, 60000)
+        elt_reading = random.randint(1000, 600_000)
+
+        # Create an avacon-style meter number
+        gas_meter_number = generate_meter_number()
+        elt_meter_number = generate_meter_number()
+
+        num_readings = random.randint(1, 10)
+
+        # Get initial timestamp: Assuming that each reading takes place once a year around a similar
+        # date, we just take today's date and subtract a number of years corresponding to the number
+        # of readings
+        init_date = generate_past_date_with_variance(num_readings)
+        tmp_gas_dates: list[datetime] = []
+        tmp_elt_dates: list[datetime] = []
+        tmp_gas_readings = []
+        tmp_elt_readings = []
+        for j in range(num_readings):
+            time_diff = 0
+            if j > 0:
+                time_diff = 365 + random.randint(-50, 50)
+
+            gas_date = tmp_gas_dates[-1] + timedelta(days=time_diff) if j > 0 else init_date
+
+            # Electricity is around a similar date as natural gas
+            elt_date = gas_date + timedelta(days=random.randint(-10, 10))
+
+            # Generate random readings
+            gas_reading += int(random.gauss(mean_cubic, mean_cubic * 0.1))
+            elt_reading += int(random.gauss(mean_electricity, mean_electricity * 0.1))
+
+            # Append to temporary lists
+            tmp_gas_dates.append(gas_date)
+            tmp_elt_dates.append(elt_date)
+            tmp_gas_readings.append(gas_reading)
+            tmp_elt_readings.append(elt_reading)
+
+        # Append to final list
+        full_readings_dict = {
+            "electricity": (elt_meter_number, tmp_elt_dates, tmp_elt_readings),
+            "gas": (gas_meter_number, tmp_gas_dates, tmp_gas_readings),
+        }
+        readings.append(full_readings_dict)
+
+    return readings
+
+
+def generate_past_date_with_variance(years_ago):
+    # Get current date (ignoring time)
+    current_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+
+    # Subtract the specified number of years
+    past_date = current_date.replace(year=current_date.year - years_ago)
+
+    # Generate a random number of days between -50 and 50
+    days_variance = random.randint(-50, 50)
+
+    # Apply the variance
+    final_date = past_date + timedelta(days=days_variance)
+
+    return final_date
+
+
+def weighted_random_int(min_value: int = 1, max_value: int = 120) -> int:
+    """Generate a random integer with a logarithmic distribution.
+
+    This function produces random integers between min_value and max_value (inclusive),
+    with a distribution skewed towards lower numbers. It uses a logarithmic
+    transformation to achieve this weighted distribution.
+
+    Parameters
+    ----------
+    min_value : int, optional
+        The minimum value of the range (inclusive). Default is 1.
+    max_value : int, optional
+        The maximum value of the range (inclusive). Default is 120.
+
+    Returns
+    -------
+    int
+        A random integer between min_value and max_value, with a distribution
+        skewed towards lower numbers.
+    """
+    r = random.random()
+
+    # Apply a logarithmic transformation to skew towards lower numbers
+    value = math.exp(r * math.log(max_value - min_value + 1)) + min_value - 1
+
+    # Round down to the nearest integer
+    return int(math.floor(value))
+
+
+def generate_meter_number() -> str:
+    """Generate a random meter number in a specific format.
+
+    This function creates a meter number string in the format "X.YYY.ZZZ.Q",
+    where X and Q are single digits, and YYY and ZZZ are three-digit numbers.
+
+    Returns
+    -------
+    str
+        A randomly generated meter number string in the format "X.YYY.ZZZ.Q".
+    """
+    return (
+        f"{random.randint(1, 9)}.{random.randint(100, 999)}."
+        f"{random.randint(100, 999)}.{random.randint(1, 9)}"
+    )
+
+
+if __name__ == "__main__":
+    # Generate data for app
+
+    # Number of customers to generate
+    num_customers = 1000
+
+    # Load base data file
+    # NOTE: All of this information is publicly available, see readme for sources of information
+    # that were used
+    with open(DATA_DIR / "base_data.json", "r") as file:
+        base_data = json.load(file)
+
+    zip_results = base_data["zips"]
+
+    # Create weighted population-weighted sample of customers
+    df = pd.DataFrame(zip_results)
+    df["weight"] = df["population"] / df["population"].sum()
+    selected_zips = df.sample(n=num_customers, weights="weight", replace=True)
+
+    # Generate customer names
+    print("Generating customers data...")
+    c_given_names = random.choices(base_data["given_names"], k=num_customers)
+    c_surnames = random.choices(base_data["surnames"], k=num_customers)
+
+    # Generate addresses
+    print("Generating address data...")
+    c_streets = random.choices(base_data["streets"], k=num_customers)
+
+    # For street numbers, we just generate a random number between 1 and 120, weighted such that the
+    # lower numbers are more likely to occur
+    house_numbers = [weighted_random_int(1, 120) for _ in range(num_customers)]
+
+    # Finally, create meter readings
+    print("Generating meter readings...")
+    readings = generate_readings(num_customers)
+
+    # Create a final list of customers and store as JSON
+    print("Creating final JSON file...")
+    customers = []
+    for i in range(num_customers):
+        customers.append(
+            {
+                "given_name": c_given_names[i],
+                "surname": c_surnames[i],
+                "street": c_streets[i],
+                "house_number": house_numbers[i],
+                "city": selected_zips.iloc[i]["city"],
+                "zip_code": selected_zips.iloc[i]["zip_codes"],
+                "longitude": float(selected_zips.iloc[i]["longitude"]),
+                "latitude": float(selected_zips.iloc[i]["latitude"]),
+                "readings_elt": readings[i]["electricity"],
+                "readings_gas": readings[i]["gas"],
+            }
+        )
+
+    with open(DATA_DIR / "customers.json", "w") as file:
+        json.dump(customers, file, indent=4, ensure_ascii=False, cls=DateTimeEncoder)
--- a/data_preparation/get_base_data.py
+++ b/data_preparation/get_base_data.py
@@ -0,0 +1,162 @@
+"""Get base data (Addresses, Names) to form the foundation of the customer generation process."""
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+import requests
+from config import DATA_DIR
+
+
+def read_file_to_list(file_path: Union[str, Path]) -> List[str]:
+    """Read the contents of a file and return them as a list of strings.
+
+    This function opens the specified file, reads all lines, and returns
+    them as a list where each element is a line from the file.
+
+    Parameters
+    ----------
+    file_path : str
+        The path to the file to be read.
+
+    Returns
+    -------
+    List[str]
+        A list containing the lines of the file as strings.
+    """
+    with open(file_path, "r") as file:
+        return file.readlines()
+
+
+def get_datalist(file_path: Path) -> List[str]:
+    """Read a file and return its contents as a list of stripped strings.
+
+    This function reads the contents of the specified file, removes leading
+    and trailing whitespace from each line, and returns the resulting lines
+    as a list of strings.
+
+    Parameters
+    ----------
+    file_path : Path
+        The path to the file to be read, as a pathlib.Path object.
+
+    Returns
+    -------
+    List[str]
+        A list containing the stripped lines of the file as strings.
+    """
+    data_list = read_file_to_list(file_path)
+    for idx in range(len(data_list)):
+        data_list[idx] = data_list[idx].strip()
+
+    return data_list
+
+
+def select_zips() -> Dict[str, List]:
+    """Select and retrieve information about zip codes within the Avacon Netz area.
+
+    This function reads a list of German zip codes, filters them based on
+    geographical boundaries approximating the Avacon Netz coverage area,
+    and retrieves additional information for each selected zip code using
+    a public API.
+
+    The function uses predefined latitude and longitude ranges to determine
+    if a zip code falls within the Avacon Netz area.
+
+    Returns
+    -------
+    Dict[str, List]
+        A dictionary containing the following keys, each associated with a list:
+        - 'latitude': Latitudes of selected zip codes
+        - 'longitude': Longitudes of selected zip codes
+        - 'population': Population counts for selected zip codes
+        - 'city': City names for selected zip codes
+        - 'zip_codes': Selected zip codes
+
+    Notes
+    -----
+    - This function requires an internet connection to access the API.
+    - The function assumes the existence of a global constant DATA_DIR
+      pointing to the directory containing the 'zip_codes.txt' file.
+    - The API used is 'https://gvz.tuerantuer.org/api/administrative_divisions/'.
+    """
+    lat_list = []
+    lon_list = []
+    pop_list = []
+    selected_zips = []
+    city_list = []
+
+    # Get list of all zip codes in Germany
+    zip_list = read_file_to_list(DATA_DIR / "zip_codes.txt")
+
+    # Geographical settings:
+    # The main idea is to roughly describe the Avacon Netz area, then use an API to go through all
+    # zip codes in Germany and only keep those that are inside the longitude and latitude range.
+    # We use the same API to get the actual latutide and longitude for each zip code and the total
+    # population so that we can use this information to weight the sampling of customers.
+
+    # Approximate Northernmost point of Avacon Netz coverage near Geesthacht to southernmost point
+    # in south Harz mountains close to Ilfeld
+    min_lat, max_lat = 51.618147, 53.432608
+    # Approximate Westernmost point of Avacon Netz coverage near Vechta to easternmost point near
+    # Reuden/Anhalt
+    min_lon, max_lon = 8.476919, 12.270417
+
+    for zip_code in zip_list:
+        if zip_code in selected_zips:
+            continue
+
+        # Get additional data by using this public API
+        response = requests.get(
+            f"https://gvz.tuerantuer.org/api/administrative_divisions/?search={zip_code}"
+        )
+
+        if response.status_code == 200:
+            response_json = response.json()
+            if response_json["count"] > 0:
+                zip_data = response_json["results"][0]
+                lat = zip_data["latitude"]
+                lon = zip_data["longitude"]
+
+                # Check if the zip code is within the Avacon Netz area
+                if (min_lat <= lat <= max_lat) and (min_lon <= lon <= max_lon):
+                    num_associated_zips = len(zip_data["zip_codes"])
+                    selected_zips += zip_data["zip_codes"]  # Add any associated zip-codes
+                    lat_list += [lat] * num_associated_zips
+                    lon_list += [lon] * num_associated_zips
+                    pop_list += [zip_data["citizens_total"]] * num_associated_zips
+                    city_list += [zip_data["office_city"]] * num_associated_zips
+
+            else:
+                print(f"No data found for {zip_code}")
+
+    result = {
+        "latitude": lat_list,
+        "longitude": lon_list,
+        "population": pop_list,
+        "city": city_list,
+        "zip_codes": selected_zips,
+    }
+
+    return result
+
+
+if __name__ == "__main__":
+    data: Dict[str, Union[list[Any], Dict[str, list[Any]]]] = {}
+
+    # Get given names and surnames
+    data["surnames"] = get_datalist(DATA_DIR / "names.txt")
+    data["given_names"] = get_datalist(DATA_DIR / "female_gnames.txt") + get_datalist(
+        DATA_DIR / "male_gnames.txt"
+    )
+
+    # Get list of street names
+    data["streets"] = get_datalist(DATA_DIR / "street_names.txt")
+
+    # Get list of all zip codes and select those in the Avacon Netz area.
+    # Also include geographical and population data, which is needed for sampling later.
+    data["zips"] = select_zips()
+
+    # Save data to file
+    with open(DATA_DIR / "base_data.json", "w") as file:
+        json.dump(data, file, indent=4, ensure_ascii=False)
--- a/data_preparation/init_sql_schema.py
+++ b/data_preparation/init_sql_schema.py
@@ -0,0 +1,111 @@
+"""Script to create the SQL schema in Azure SQL Database."""
+
+import os
+
+import pyodbc
+
+schema_creation = """
+CREATE TABLE Addresses (
+    ID INT PRIMARY KEY IDENTITY(1,1),
+    StreetName NVARCHAR(100),
+    HouseNumber NVARCHAR(10),
+    City NVARCHAR(50),
+    PostalCode NVARCHAR(10),
+    Longitude FLOAT,
+    Latitude FLOAT
+);
+
+CREATE TABLE Meters (
+    ID INT PRIMARY KEY IDENTITY(1,1),
+    Signature NVARCHAR(11),
+    MeterType NVARCHAR(3),
+    AddressID INT,
+    FOREIGN KEY (AddressID) REFERENCES Addresses(ID)
+);
+
+CREATE TABLE Customers (
+    ID INT PRIMARY KEY IDENTITY(1,1),
+    FirstName NVARCHAR(100),
+    LastName NVARCHAR(100),
+    GasMeterID INT,
+    EltMeterID INT,
+    FOREIGN KEY (GasMeterID) REFERENCES Meters(ID),
+    FOREIGN KEY (EltMeterID) REFERENCES Meters(ID)
+);
+
+CREATE TABLE Readings (
+    ID INT PRIMARY KEY IDENTITY(1,1),
+    CustomerID INT,
+    MeterID INT,
+    ReadingDate DATE,
+    ReadingValue INT,
+    FOREIGN KEY (CustomerID) REFERENCES Customers(ID),
+    FOREIGN KEY (MeterID) REFERENCES Meters(ID)
+);
+"""
+
+
+def create_tables(cursor: pyodbc.Cursor, schema: str) -> None:
+    """Create database tables based on the provided schema.
+
+    This function takes a database schema as a string and executes each table
+    creation statement separately. It handles cases where tables already exist
+    and reports this information.
+
+    Parameters
+    ----------
+    cursor : pyodbc.Cursor
+        A pyodbc cursor object used to execute SQL commands.
+    schema : str
+        A string containing SQL statements for table creation, separated by
+        semicolons.
+
+    Returns
+    -------
+    None
+        This function doesn't return any value.
+
+    Raises
+    ------
+    pyodbc.ProgrammingError
+        If there's an error in executing the SQL statements, other than
+        "table already exists".
+
+    Notes
+    -----
+    - The function assumes the existence of a global 'conn' object for
+      committing changes to the database.
+    - It prints a message if a table already exists instead of raising an error.
+    """
+    for table_creation in schema.split(";"):
+        if table_creation.strip():
+            try:
+                cursor.execute(table_creation)
+                conn.commit()
+            except pyodbc.ProgrammingError as e:
+                if "There is already an object named" in str(e):
+                    print(f"Table already exists: {table_creation.split('(')[0].split()[-1]}")
+                else:
+                    raise
+
+
+if __name__ == "__main__":
+
+    # Connect to the database
+    connection_string = os.environ.get("AZURE_SQL_CONNECTION_STRING")
+
+    try:
+        conn = pyodbc.connect(connection_string)
+        cursor = conn.cursor()
+        print("Connected to Azure SQL Database successfully!")
+
+        # Example query
+        create_tables(cursor, schema_creation)
+
+    except pyodbc.Error as e:
+        print(f"Error connecting to Azure SQL Database: {e}")
+    finally:
+        if "cursor" in locals():
+            cursor.close()
+        if "conn" in locals():
+            conn.close()
--- a/data_preparation/insert_sql.py
+++ b/data_preparation/insert_sql.py
@@ -0,0 +1,214 @@
+"""Script to insert the preprocessed data into the Azure SQL Database."""
+
+import json
+import os
+from datetime import datetime
+from typing import List, Tuple
+
+import pyodbc
+from config import DATA_DIR
+
+
+def insert_addresses(
+    cursor: pyodbc.Cursor, addresses: List[Tuple[str, str, str, str, float, float]]
+) -> None:
+    """Insert multiple addresses into the Addresses table using pyodbc.
+
+    Parameters
+    ----------
+    cursor : pyodbc.Cursor
+        A pyodbc cursor object used to execute SQL commands.
+    addresses : List[Tuple[str, str, str, str, float, float]]
+        A list of tuples, where each tuple represents an address with the following elements:
+        (StreetName, HouseNumber, City, PostalCode, Longitude, Latitude).
+
+    Returns
+    -------
+    None
+        This function doesn't return any value.
+    """
+    for address in addresses:
+        cursor.execute(
+            """
+            INSERT INTO Addresses (StreetName, HouseNumber, City, PostalCode, Longitude, Latitude)
+            VALUES (?, ?, ?, ?, ?, ?);
+        """,
+            address,
+        )
+        conn.commit()
+
+
+def insert_meters(cursor: pyodbc.Cursor, meter_data: List[Tuple[str, str, int]]) -> None:
+    """Insert multiple meters into the Meters table in a database using pyodbc.
+
+    Parameters
+    ----------
+    cursor : pyodbc.Cursor
+        A pyodbc cursor object used to execute SQL commands.
+    meter_data : List[Tuple[str, str, int]]
+        A list of tuples, where each tuple represents a meter with the following elements:
+        (Signature, MeterType, AddressID).
+
+    Returns
+    -------
+    None
+        This function doesn't return any value.
+    """
+    for meter in meter_data:
+        cursor.execute(
+            """
+            INSERT INTO Meters (Signature, MeterType, AddressID)
+            VALUES (?, ?, ?);
+        """,
+            meter,
+        )
+        conn.commit()
+
+
+def insert_customers(cursor: pyodbc.Cursor, customer_data: List[Tuple[str, str, int, int]]) -> None:
+    """Insert multiple customers into the Customers table in a database using pyodbc.
+
+    Parameters
+    ----------
+    cursor : pyodbc.Cursor
+        A pyodbc cursor object used to execute SQL commands.
+    customer_data : List[Tuple[str, str, int, int]]
+        A list of tuples, where each tuple represents a customer with the following elements:
+        (FirstName, LastName, GasMeterID, EltMeterID).
+
+    Returns
+    -------
+    None
+        This function doesn't return any value.
+    """
+    for customer in customer_data:
+        cursor.execute(
+            """
+            INSERT INTO Customers (FirstName, LastName, GasMeterID, EltMeterID)
+            VALUES (?, ?, ?, ?);
+        """,
+            customer,
+        )
+        conn.commit()
+
+
+def insert_readings(
+    cursor: pyodbc.Cursor, readings_data: List[Tuple[int, int, datetime, int]]
+) -> None:
+    """Insert multiple readings into the Readings table in a database using pyodbc.
+
+    Parameters
+    ----------
+    cursor : pyodbc.Cursor
+        A pyodbc cursor object used to execute SQL commands.
+    readings_data : List[Tuple[int, int, datetime, int]]
+        A list of tuples, where each tuple represents a reading with the following elements:
+        (CustomerID, MeterID, ReadingDate, ReadingValue).
+
+    Returns
+    -------
+    None
+        This function doesn't return any value.
+    """
+    for reading in readings_data:
+        cursor.execute(
+            """
+            INSERT INTO Readings (CustomerID, MeterID, ReadingDate, ReadingValue)
+            VALUES (?, ?, ?, ?);
+        """,
+            reading,
+        )
+        conn.commit()
+
+
+if __name__ == "__main__":
+    conn_str = os.environ.get("AZURE_SQL_CONNECTION_STRING")
+    try:
+        conn = pyodbc.connect(conn_str)
+        cursor = conn.cursor()
+        print("Connected to Azure SQL Database successfully!")
+
+    except pyodbc.Error as e:
+        print(f"Error connecting to Azure SQL Database: {e}")
+
+    # Load generated customer data
+    with open(DATA_DIR / "customers.json", "r") as file:
+        customers = json.load(file)
+
+    # Insert data into the database
+    # start with addresses
+    full_addr_list = []
+    for c in customers:
+        full_addr_list.append(
+            (
+                c["street"],
+                c["house_number"],
+                c["city"],
+                c["zip_code"],
+                c["longitude"],
+                c["latitude"],
+            )
+        )
+
+    unique_addresses = list(set(full_addr_list))
+
+    address_ids = []
+    for i in range(len(customers)):
+        address_ids.append(unique_addresses.index(full_addr_list[i]) + 1)
+
+    gas_meter_data = []
+    elt_meter_data = []
+    gas_readings_data = []
+    elt_readings_data = []
+    customer_data = []
+    gas_id = 1
+    elt_id = len(customers) + 1
+    for c_idx, c in enumerate(customers):
+        customer_data.append((c["given_name"], c["surname"], c_idx + 1, c_idx + 1))
+        for r in range(len(c["readings_gas"][1])):
+            if r == 0:
+                gas_meter_data.append((c["readings_gas"][0], "GAS", address_ids[c_idx]))
+                elt_meter_data.append((c["readings_elt"][0], "ELT", address_ids[c_idx]))
+            gas_readings_data.append(
+                (
+                    c_idx + 1,
+                    gas_id,
+                    datetime.fromisoformat(c["readings_gas"][1][r]),
+                    c["readings_gas"][2][r],
+                )
+            )  # CustomerID, MeterID, ReadingDate, ReadingValue
+            elt_readings_data.append(
+                (
+                    c_idx + 1,
+                    elt_id,
+                    datetime.fromisoformat(c["readings_elt"][1][r]),
+                    c["readings_elt"][2][r],
+                )
+            )
+        gas_id += 1
+        elt_id += 1
+
+    print("Inserting addresses into the database...")
+    insert_addresses(cursor, unique_addresses)
+
+    print("Inserting gas meter data into the database...")
+    insert_meters(cursor, gas_meter_data)
+
+    print("Inserting electricity meter data into the database...")
+    insert_meters(cursor, elt_meter_data)
+
+    print("Inserting customer data into the database...")
+    insert_customers(cursor, customer_data)
+
+    print("Inserting gas readings into the database...")
+    insert_readings(cursor, gas_readings_data)
+
+    print("Inserting electricity readings into the database...")
+    insert_readings(cursor, elt_readings_data)
+
+    print("Data successfully inserted into the database!")
+
+    if "cursor" in locals():
+        cursor.close()
+    if "conn" in locals():
+        conn.close()
--- a/data_preparation/test_sql_connection.py
+++ b/data_preparation/test_sql_connection.py
@@ -0,0 +1,56 @@
+"""Script to test connection to Azure SQL Database."""
+
+import os
+
+import pyodbc
+
+
+def test_connection() -> bool:
+    """Test the connection to Azure SQL Database.
+
+    This function attempts to establish a connection to an Azure SQL Database
+    using a connection string stored in the environment variable
+    'AZURE_SQL_CONNECTION_STRING'. It executes a simple query to verify
+    the connection and prints the result of the connection attempt.
+
+    Returns
+    -------
+    bool
+        True if the connection was successful and closed properly,
+        False if there was an error connecting to the database.
+
+    Notes
+    -----
+    - The function requires the 'AZURE_SQL_CONNECTION_STRING' environment
+      variable to be set with a valid connection string.
+    - It uses pyodbc to establish the database connection.
+    - The function prints success or error messages to stdout.
+    - In case of a successful connection, it executes "SELECT @@version;"
+      as a test query.
+    - The function ensures that both the cursor and the connection are
+      closed after the operation, regardless of its success or failure.
+    """
+    connection_string = os.environ.get("AZURE_SQL_CONNECTION_STRING")
+    try:
+        conn = pyodbc.connect(connection_string)
+        cursor = conn.cursor()
+        print("Connected to Azure SQL Database successfully!")
+
+        # Example query
+        cursor.execute("SELECT @@version;")
+
+    except pyodbc.Error as e:
+        print(f"Error connecting to Azure SQL Database: {e}")
+        return False
+
+    finally:
+        if "cursor" in locals():
+            cursor.close()
+        if "conn" in locals():
+            conn.close()
+
+    return True
+
+
+if __name__ == "__main__":
+    test_connection()
--- a/poetry.lock
+++ b/poetry.lock
@@ -802,6 +802,67 @@ files = [
    {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
 ]

+[[package]]
+name = "numpy"
+version = "2.1.0"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"},
+    {file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"},
+    {file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"},
+    {file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"},
+    {file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"},
+    {file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"},
+    {file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"},
+    {file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"},
+    {file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"},
+    {file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"},
+    {file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"},
+    {file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"},
+    {file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"},
+    {file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"},
+    {file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"},
+    {file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"},
+    {file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"},
+    {file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"},
+    {file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"},
+    {file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"},
+    {file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"},
+    {file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"},
+    {file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"},
+    {file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"},
+    {file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"},
+    {file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"},
+    {file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"},
+    {file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"},
+    {file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"},
+    {file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"},
+    {file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"},
+    {file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"},
+    {file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"},
+    {file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"},
+    {file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"},
+    {file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"},
+    {file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"},
+    {file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"},
+    {file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"},
+    {file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"},
+]
+
 [[package]]
 name = "numpydoc"
 version = "1.8.0"
@@ -866,6 +927,79 @@ files = [
 dev = ["pytest", "tox"]
 lint = ["black"]

+[[package]]
+name = "pandas"
+version = "2.2.2"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
+    {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
+    {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
+    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
+    {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
+    {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.22.4", markers = "python_version < \"3.11\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+]
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.7"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
+
 [[package]]
 name = "pathspec"
 version = "0.12.1"
@@ -995,6 +1129,46 @@ pyyaml = "*"
 [package.extras]
 extra = ["pygments (>=2.12)"]

+[[package]]
+name = "pyodbc"
+version = "5.1.0"
+description = "DB API module for ODBC"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyodbc-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:02fe9821711a2d14415eaeb4deab471d2c8b7034b107e524e414c0e133c42248"},
+    {file = "pyodbc-5.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2cbdbd019756285dc44bc35238a3ed8dfaa454e8c8b2c3462f1710cfeebfb290"},
+    {file = "pyodbc-5.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84df3bbce9bafe65abd25788d55c9f1da304f6115d70f25758ff8c85f3ce0517"},
+    {file = "pyodbc-5.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218bb75d4bc67075529a65ce8ec7daeed1d83c33dd7410450fbf68d43d184d28"},
+    {file = "pyodbc-5.1.0-cp310-cp310-win32.whl", hash = "sha256:eae576b3b67d21d6f237e18bb5f3df8323a2258f52c3e3afeef79269704072a9"},
+    {file = "pyodbc-5.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:c3b65343557f4c7753204e06f4c82c97ed212a636501f4bc27c5ce0e549eb3e8"},
+    {file = "pyodbc-5.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa6f46377da303bf79bcb4b559899507df4b2559f30dcfdf191358ee4b99f3ab"},
+    {file = "pyodbc-5.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b19d7f44cfee89901e482f554a88177e83fae76b03c3f830e0023a195d840220"},
+    {file = "pyodbc-5.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c36448322f8d6479d87c528cf52401a6ea4f509b9637750b67340382b4e1b40"},
+    {file = "pyodbc-5.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c5e0cb79222aad4b31a3602e39b242683c29c6221a16ed43f45f18fd0b73659"},
+    {file = "pyodbc-5.1.0-cp311-cp311-win32.whl", hash = "sha256:92caed9d445815ed3f7e5a1249e29a4600ebc1e99404df81b6ed7671074c9227"},
+    {file = "pyodbc-5.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1bd14633e91b7a9814f4fd944c9ebb89fb7f1fd4710c4e3999b5ef041536347"},
+    {file = "pyodbc-5.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d3d9cc4af703c4817b6e604315910b0cf5dcb68056d52b25ca072dd59c52dcbc"},
+    {file = "pyodbc-5.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:406b8fa2133a7b6a713aa5187dba2d08cf763b5884606bed77610a7660fdfabe"},
+    {file = "pyodbc-5.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8488c3818f12207650836c5c6f7352f9ff9f56a05a05512145995e497c0bbb1"},
+    {file = "pyodbc-5.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0df69e3a500791b70b5748c68a79483b24428e4c16027b56aa0305e95c143a4"},
+    {file = "pyodbc-5.1.0-cp312-cp312-win32.whl", hash = "sha256:aa4e02d3a9bf819394510b726b25f1566f8b3f0891ca400ad2d4c8b86b535b78"},
+    {file = "pyodbc-5.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:33f4984af38872e7bdec78007a34e4d43ae72bf9d0bae3344e79d9d0db157c0e"},
+    {file = "pyodbc-5.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29425e2d366e7f5828b76c7993f412a3db4f18bd5bcee00186c00b5a5965e205"},
+    {file = "pyodbc-5.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2bbd2e75c77dee9f3cd100c3246110abaeb9af3f7fa304ccc2934ff9c6a4fa4"},
+    {file = "pyodbc-5.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3602136a936bc0c1bb9722eb2fbf2042b3ff1ddccdc4688e514b82d4b831563b"},
+    {file = "pyodbc-5.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bed1c843565d3a4fd8c332ebceaf33efe817657a0505eacb97dd1b786a985b0b"},
+    {file = "pyodbc-5.1.0-cp38-cp38-win32.whl", hash = "sha256:735f6da3762e5856b5580be0ed96bb946948346ebd1e526d5169a5513626a67a"},
+    {file = "pyodbc-5.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:c5bb4e43f6c72f5fa2c634570e0d761767d8ea49f39205229b812fb4d3fe05aa"},
+    {file = "pyodbc-5.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:33f0f1d7764cefef6f787936bd6359670828a6086be67518ab951f1f7f503cda"},
+    {file = "pyodbc-5.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:be3b1c36c31ec7d73d0b34a8ad8743573763fadd8f2bceef1e84408252b48dce"},
+    {file = "pyodbc-5.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e71a51c252b503b4d753e21ed31e640015fc0d00202d42ea42f2396fcc924b4a"},
+    {file = "pyodbc-5.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af5282cc8b667af97d76f4955250619a53f25486cbb6b1f45a06b781006ffa0b"},
+    {file = "pyodbc-5.1.0-cp39-cp39-win32.whl", hash = "sha256:96b2a8dc27693a517e3aad3944a7faa8be95d40d7ec1eda51a1885162eedfa33"},
+    {file = "pyodbc-5.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:e738c5eedb4a0cbab20cc008882f49b106054499db56864057c2530ff208cf32"},
+    {file = "pyodbc-5.1.0.tar.gz", hash = "sha256:397feee44561a6580be08cedbe986436859563f4bb378f48224655c8e987ea60"},
+]
+
 [[package]]
 name = "pytest"
 version = "8.3.2"
@@ -1045,6 +1219,17 @@ files = [
 [package.extras]
 cli = ["click (>=5.0)"]

+[[package]]
+name = "pytz"
+version = "2024.1"
+description = "World timezone definitions, modern and historical"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
+    {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.2"
@@ -1480,6 +1665,17 @@ files = [
    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]

+[[package]]
+name = "tzdata"
+version = "2024.1"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+files = [
+    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
+    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
+]
+
 [[package]]
 name = "urllib3"
 version = "2.2.2"
@@ -1598,4 +1794,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "1ace87aaeab8e9964d3f2e843eb5a0d15c1506bb0d1871dc41a9a38f5ed3a352"
+content-hash = "3f76ad143c2f6e35163d1bc8d123b97b9bf2a96f86914d240e0c7da2226f1d85"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,8 @@ python = "^3.10"
 plotly = "^5.23.0"
 dash = "^2.17.1"
 gunicorn = "^23.0.0"
+pyodbc = "^5.1.0"
+pandas = "^2.2.2"

 [tool.poetry.group.docs.dependencies]
 mkdocs = "^1.6.0"