feat(add-data): Add script to insert prepped data into database
The script `insert_sql.py` uses `pyodbc` to connect to the Azure SQL database, loads the data from the preprocessed `customers.json` file, formats them and then inserts them into the created table schema.
This commit is contained in:
214
data_preparation/insert_sql.py
Normal file
214
data_preparation/insert_sql.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""Script to insert the preprocessed data into the Azure SQL Database."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import List, Tuple
|
||||
|
||||
import pyodbc
|
||||
from config import DATA_DIR
|
||||
|
||||
|
||||
def insert_addresses(
|
||||
cursor: pyodbc.Cursor, addresses: List[Tuple[str, str, str, str, float, float]]
|
||||
) -> None:
|
||||
"""Insert multiple addresses into the Addresses table using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
addresses : List[Tuple[str, str, str, str, float, float]]
|
||||
A list of tuples, where each tuple represents an address with the following elements:
|
||||
(StreetName, HouseNumber, City, PostalCode, Longitude, Latitude).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for address in addresses:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Addresses (StreetName, HouseNumber, City, PostalCode, Longitude, Latitude)
|
||||
VALUES (?, ?, ?, ?, ?, ?);
|
||||
""",
|
||||
address,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_meters(cursor: pyodbc.Cursor, meter_data: List[Tuple[str, str, int]]) -> None:
|
||||
"""Insert multiple meters into the Meters table in a database using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
meter_data : List[Tuple[str, str, int]]
|
||||
A list of tuples, where each tuple represents a meter with the following elements:
|
||||
(Signature, MeterType, AddressID).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for meter in meter_data:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Meters (Signature, MeterType, AddressID)
|
||||
VALUES (?, ?, ?);
|
||||
""",
|
||||
meter,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_customers(cursor: pyodbc.Cursor, customer_data: List[Tuple[str, str, int, int]]) -> None:
|
||||
"""Insert multiple customers into the Customers table in a database using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
customer_data : List[Tuple[str, str, int, int]]
|
||||
A list of tuples, where each tuple represents a customer with the following elements:
|
||||
(FirstName, LastName, GasMeterID, EltMeterID).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for customer in customer_data:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Customers (FirstName, LastName, GasMeterID, EltMeterID)
|
||||
VALUES (?, ?, ?, ?);
|
||||
""",
|
||||
customer,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_readings(
|
||||
cursor: pyodbc.Cursor, readings_data: List[Tuple[int, int, datetime, int]]
|
||||
) -> None:
|
||||
"""Insert multiple readings into the Readings table in a database using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
readings_data : List[Tuple[int, int, datetime, int]]
|
||||
A list of tuples, where each tuple represents a reading with the following elements:
|
||||
(CustomerID, MeterID, ReadingDate, ReadingValue).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for reading in readings_data:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Readings (CustomerID, MeterID, ReadingDate, ReadingValue)
|
||||
VALUES (?, ?, ?, ?);
|
||||
""",
|
||||
reading,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
conn_str = os.environ.get("AZURE_SQL_CONNECTION_STRING")
|
||||
try:
|
||||
conn = pyodbc.connect(conn_str)
|
||||
cursor = conn.cursor()
|
||||
print("Connected to Azure SQL Database successfully!")
|
||||
|
||||
except pyodbc.Error as e:
|
||||
print(f"Error connecting to Azure SQL Database: {e}")
|
||||
|
||||
# Load generated customer data
|
||||
with open(DATA_DIR / "customers.json", "r") as file:
|
||||
customers = json.load(file)
|
||||
|
||||
# Insert data into the database
|
||||
# start with addresses
|
||||
full_addr_list = []
|
||||
for c in customers:
|
||||
full_addr_list.append(
|
||||
(
|
||||
c["street"],
|
||||
c["house_number"],
|
||||
c["city"],
|
||||
c["zip_code"],
|
||||
c["longitude"],
|
||||
c["latitude"],
|
||||
)
|
||||
)
|
||||
|
||||
unique_addresses = list(set(full_addr_list))
|
||||
|
||||
address_ids = []
|
||||
for i in range(len(customers)):
|
||||
address_ids.append(unique_addresses.index(full_addr_list[i]) + 1)
|
||||
|
||||
gas_meter_data = []
|
||||
elt_meter_data = []
|
||||
gas_readings_data = []
|
||||
elt_readings_data = []
|
||||
customer_data = []
|
||||
gas_id = 1
|
||||
elt_id = len(customers) + 1
|
||||
for c_idx, c in enumerate(customers):
|
||||
customer_data.append((c["given_name"], c["surname"], c_idx + 1, c_idx + 1))
|
||||
for r in range(len(c["readings_gas"][1])):
|
||||
if r == 0:
|
||||
gas_meter_data.append((c["readings_gas"][0], "GAS", address_ids[c_idx]))
|
||||
elt_meter_data.append((c["readings_elt"][0], "ELT", address_ids[c_idx]))
|
||||
gas_readings_data.append(
|
||||
(
|
||||
c_idx + 1,
|
||||
gas_id,
|
||||
datetime.fromisoformat(c["readings_gas"][1][r]),
|
||||
c["readings_gas"][2][r],
|
||||
)
|
||||
) # CustomerID, MeterID, ReadingDate, ReadingValue
|
||||
elt_readings_data.append(
|
||||
(
|
||||
c_idx + 1,
|
||||
elt_id,
|
||||
datetime.fromisoformat(c["readings_elt"][1][r]),
|
||||
c["readings_elt"][2][r],
|
||||
)
|
||||
)
|
||||
gas_id += 1
|
||||
elt_id += 1
|
||||
|
||||
print("Inserting addresses into the database...")
|
||||
insert_addresses(cursor, unique_addresses)
|
||||
|
||||
print("Inserting gas meter data into the database...")
|
||||
insert_meters(cursor, gas_meter_data)
|
||||
|
||||
print("Inserting electricity meter data into the database...")
|
||||
insert_meters(cursor, elt_meter_data)
|
||||
|
||||
print("Inserting customer data into the database...")
|
||||
insert_customers(cursor, customer_data)
|
||||
|
||||
print("Inserting gas readings into the database...")
|
||||
insert_readings(cursor, gas_readings_data)
|
||||
|
||||
print("Inserting electricity readings into the database...")
|
||||
insert_readings(cursor, elt_readings_data)
|
||||
|
||||
print("Data successfully inserted into the database!")
|
||||
|
||||
if "cursor" in locals():
|
||||
cursor.close()
|
||||
if "conn" in locals():
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user