Merge pull request 'feat/add-data' (#3) from feat/add-data into main
Reviewed-on: #3
This commit was merged in pull request #3.
This commit is contained in:
25719
data/base_data.json
Normal file
25719
data/base_data.json
Normal file
File diff suppressed because it is too large
Load Diff
45094
data/customers.json
Normal file
45094
data/customers.json
Normal file
File diff suppressed because it is too large
Load Diff
1003
data/female_gnames.txt
Normal file
1003
data/female_gnames.txt
Normal file
File diff suppressed because it is too large
Load Diff
1003
data/male_gnames.txt
Normal file
1003
data/male_gnames.txt
Normal file
File diff suppressed because it is too large
Load Diff
3421
data/names.txt
Normal file
3421
data/names.txt
Normal file
File diff suppressed because it is too large
Load Diff
1157
data/street_names.txt
Normal file
1157
data/street_names.txt
Normal file
File diff suppressed because it is too large
Load Diff
8168
data/zip_codes.txt
Normal file
8168
data/zip_codes.txt
Normal file
File diff suppressed because it is too large
Load Diff
7
data_preparation/config.py
Normal file
7
data_preparation/config.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Global configuration for data preprocessing."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
APP_DIR = BASE_DIR / "app"
|
||||
34
data_preparation/data_utils.py
Normal file
34
data_preparation/data_utils.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
|
||||
class DateTimeEncoder(json.JSONEncoder):
|
||||
"""A custom JSON encoder that handles datetime objects.
|
||||
|
||||
This encoder extends the json.JSONEncoder class to provide serialization
|
||||
support for datetime objects, converting them to ISO format strings.
|
||||
|
||||
Methods
|
||||
-------
|
||||
default(obj: Any) -> str
|
||||
Encode datetime objects as ISO format strings.
|
||||
"""
|
||||
|
||||
def default(self, obj: Any) -> str:
|
||||
"""Encode the given object as a JSON-serializable type.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : Any
|
||||
The object to encode.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The ISO format string if the object is a datetime, otherwise
|
||||
delegates to the superclass encoder.
|
||||
"""
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
return super().default(obj)
|
||||
222
data_preparation/generate_customers.py
Normal file
222
data_preparation/generate_customers.py
Normal file
@@ -0,0 +1,222 @@
|
||||
"""Module to randomly generate a JSON file with customer data for the Avacon app."""
|
||||
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import pandas as pd
|
||||
from config import DATA_DIR
|
||||
from data_utils import DateTimeEncoder
|
||||
|
||||
|
||||
def generate_readings(num_customers: int) -> List[Dict[str, Tuple[str, List[datetime], List[int]]]]:
|
||||
"""Generate simulated meter readings for a specified number of customers.
|
||||
|
||||
This function creates synthetic data for both natural gas and electricity
|
||||
meter readings. It simulates readings based on average consumption patterns
|
||||
in Germany, applying random variations to model real-world scenarios.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
num_customers : int
|
||||
The number of customers for which to generate meter readings.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[Dict[str, Tuple[str, List[datetime], List[int]]]]
|
||||
A list of dictionaries, where each dictionary represents a customer and
|
||||
contains:
|
||||
- 'electricity': A tuple of (meter number, list of reading dates, list of readings)
|
||||
- 'gas': A tuple of (meter number, list of reading dates, list of readings)
|
||||
"""
|
||||
|
||||
# NOTE: Of course, natural gas and electricity consumption depend on various factors, such as
|
||||
# size of the household, age of the building, insulation, etc. For the sake of this example,
|
||||
# we will simply take the known average value and sample from a Gaussian distribution around
|
||||
# this value, with a standard deviation of 10% of the mean.
|
||||
# For the mean we assumed the average size of a flat in Germany (90m²) and the average
|
||||
# consumption of 140 kWh/m², which we need to convert to m³ for the meter reading. To do this,
|
||||
# I am using the calorific value of natural gas. I assumed a value around 11.215 kWh/m³. I also
|
||||
# need to account for slight pressure differences using the conversion factor ("Zustandszahl").
|
||||
mean_natural_gas = 12600 # in kWh
|
||||
calorific_value = 11.215 # in kWh/m³
|
||||
conversion_factor = 0.9692
|
||||
|
||||
mean_cubic = mean_natural_gas / (calorific_value * conversion_factor)
|
||||
|
||||
# For electricity, we take as average consumption the one of a 2-person household in Germany.
|
||||
mean_electricity = 3500 # in kWh
|
||||
|
||||
readings = []
|
||||
# For each customer, generate between 1 and 10 readings (we assume that natural gas and
|
||||
# electricity are always read at the same time)
|
||||
for _ in range(num_customers):
|
||||
# The initial reading of the customers meter
|
||||
gas_reading = random.randint(1000, 60000)
|
||||
elt_reading = random.randint(1000, 600_000)
|
||||
|
||||
# Create an avacon-style meter number
|
||||
gas_meter_number = generate_meter_number()
|
||||
elt_meter_number = generate_meter_number()
|
||||
|
||||
num_readings = random.randint(1, 10)
|
||||
|
||||
# Get initial timestamp: Assuming that each reading takes place once a year around a similar
|
||||
# date, we just take today's date and subtract a number of years corresponding to the number
|
||||
# of readings
|
||||
init_date = generate_past_date_with_variance(num_readings)
|
||||
tmp_gas_dates: list[datetime] = []
|
||||
tmp_elt_dates: list[datetime] = []
|
||||
tmp_gas_readings = []
|
||||
tmp_elt_readings = []
|
||||
for j in range(num_readings):
|
||||
time_diff = 0
|
||||
if j > 0:
|
||||
time_diff = 365 + random.randint(-50, 50)
|
||||
|
||||
gas_date = tmp_gas_dates[-1] + timedelta(days=time_diff) if j > 0 else init_date
|
||||
|
||||
# Electricity is around a similar date as natural gas
|
||||
elt_date = gas_date + timedelta(days=random.randint(-10, 10))
|
||||
|
||||
# Generate random readings
|
||||
gas_reading += int(random.gauss(mean_cubic, mean_cubic * 0.1))
|
||||
elt_reading += int(random.gauss(mean_electricity, mean_electricity * 0.1))
|
||||
|
||||
# Append to temporary lists
|
||||
tmp_gas_dates.append(gas_date)
|
||||
tmp_elt_dates.append(elt_date)
|
||||
tmp_gas_readings.append(gas_reading)
|
||||
tmp_elt_readings.append(elt_reading)
|
||||
|
||||
# Append to final list
|
||||
full_readings_dict = {
|
||||
"electricity": (elt_meter_number, tmp_elt_dates, tmp_elt_readings),
|
||||
"gas": (gas_meter_number, tmp_gas_dates, tmp_gas_readings),
|
||||
}
|
||||
readings.append(full_readings_dict)
|
||||
|
||||
return readings
|
||||
|
||||
|
||||
def generate_past_date_with_variance(years_ago):
|
||||
# Get current date (ignoring time)
|
||||
current_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
# Subtract the specified number of years
|
||||
past_date = current_date.replace(year=current_date.year - years_ago)
|
||||
|
||||
# Generate a random number of days between -50 and 50
|
||||
days_variance = random.randint(-50, 50)
|
||||
|
||||
# Apply the variance
|
||||
final_date = past_date + timedelta(days=days_variance)
|
||||
|
||||
return final_date
|
||||
|
||||
|
||||
def weighted_random_int(min_value: int = 1, max_value: int = 120) -> int:
|
||||
"""Generate a random integer with a logarithmic distribution.
|
||||
|
||||
This function produces random integers between min_value and max_value (inclusive),
|
||||
with a distribution skewed towards lower numbers. It uses a logarithmic
|
||||
transformation to achieve this weighted distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_value : int, optional
|
||||
The minimum value of the range (inclusive). Default is 1.
|
||||
max_value : int, optional
|
||||
The maximum value of the range (inclusive). Default is 120.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
A random integer between min_value and max_value, with a distribution
|
||||
skewed towards lower numbers.
|
||||
"""
|
||||
r = random.random()
|
||||
|
||||
# Apply a logarithmic transformation to skew towards lower numbers
|
||||
value = math.exp(r * math.log(max_value - min_value + 1)) + min_value - 1
|
||||
|
||||
# Round down to the nearest integer
|
||||
return int(math.floor(value))
|
||||
|
||||
|
||||
def generate_meter_number() -> str:
|
||||
"""Generate a random meter number in a specific format.
|
||||
|
||||
This function creates a meter number string in the format "X.YYY.ZZZ.Q",
|
||||
where X and Q are single digits, and YYY and ZZZ are three-digit numbers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
A randomly generated meter number string in the format "X.YYY.ZZZ.Q".
|
||||
"""
|
||||
return (
|
||||
f"{random.randint(1, 9)}.{random.randint(100, 999)}."
|
||||
f"{random.randint(100, 999)}.{random.randint(1, 9)}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Generate data for app
|
||||
|
||||
# Number of customers to generate
|
||||
num_customers = 1000
|
||||
|
||||
# Load base data file
|
||||
# NOTE: All of this information is publicly available, see readme for sources of information
|
||||
# that were used
|
||||
with open(DATA_DIR / "base_data.json", "r") as file:
|
||||
base_data = json.load(file)
|
||||
|
||||
zip_results = base_data["zips"]
|
||||
|
||||
# Create weighted population-weighted sample of customers
|
||||
df = pd.DataFrame(zip_results)
|
||||
df["weight"] = df["population"] / df["population"].sum()
|
||||
selected_zips = df.sample(n=num_customers, weights="weight", replace=True)
|
||||
|
||||
# Generate customer names
|
||||
print("Generating customers data...")
|
||||
c_given_names = random.choices(base_data["given_names"], k=num_customers)
|
||||
c_surnames = random.choices(base_data["surnames"], k=num_customers)
|
||||
|
||||
# Generate addresses
|
||||
print("Generating address data...")
|
||||
c_streets = random.choices(base_data["streets"], k=num_customers)
|
||||
|
||||
# For street numbers, we just generate a random number between 1 and 120, weighted such that the
|
||||
# lower numbers are more likely to occur
|
||||
house_numbers = [weighted_random_int(1, 120) for _ in range(num_customers)]
|
||||
|
||||
# Finally, create meter readings
|
||||
print("Generating meter readings...")
|
||||
readings = generate_readings(num_customers)
|
||||
|
||||
# Create a final list of customers and store as JSON
|
||||
print("Creating final JSON file...")
|
||||
customers = []
|
||||
for i in range(num_customers):
|
||||
customers.append(
|
||||
{
|
||||
"given_name": c_given_names[i],
|
||||
"surname": c_surnames[i],
|
||||
"street": c_streets[i],
|
||||
"house_number": house_numbers[i],
|
||||
"city": selected_zips.iloc[i]["city"],
|
||||
"zip_code": selected_zips.iloc[i]["zip_codes"],
|
||||
"longitude": float(selected_zips.iloc[i]["longitude"]),
|
||||
"latitude": float(selected_zips.iloc[i]["latitude"]),
|
||||
"readings_elt": readings[i]["electricity"],
|
||||
"readings_gas": readings[i]["gas"],
|
||||
}
|
||||
)
|
||||
|
||||
with open(DATA_DIR / "customers.json", "w") as file:
|
||||
json.dump(customers, file, indent=4, ensure_ascii=False, cls=DateTimeEncoder)
|
||||
162
data_preparation/get_base_data.py
Normal file
162
data_preparation/get_base_data.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""Get base data (Addresses, Names) to form the foundation of the customer generation process."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import requests
|
||||
from config import DATA_DIR
|
||||
|
||||
|
||||
def read_file_to_list(file_path: Union[str, Path]) -> List[str]:
|
||||
"""Read the contents of a file and return them as a list of strings.
|
||||
|
||||
This function opens the specified file, reads all lines, and returns
|
||||
them as a list where each element is a line from the file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : str
|
||||
The path to the file to be read.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[str]
|
||||
A list containing the lines of the file as strings.
|
||||
"""
|
||||
with open(file_path, "r") as file:
|
||||
return file.readlines()
|
||||
|
||||
|
||||
def get_datalist(file_path: Path) -> List[str]:
|
||||
"""Read a file and return its contents as a list of stripped strings.
|
||||
|
||||
This function reads the contents of the specified file, removes leading
|
||||
and trailing whitespace from each line, and returns the resulting lines
|
||||
as a list of strings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : Path
|
||||
The path to the file to be read, as a pathlib.Path object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[str]
|
||||
A list containing the stripped lines of the file as strings.
|
||||
"""
|
||||
data_list = read_file_to_list(file_path)
|
||||
for idx in range(len(data_list)):
|
||||
data_list[idx] = data_list[idx].strip()
|
||||
|
||||
return data_list
|
||||
|
||||
|
||||
def select_zips() -> Dict[str, List]:
|
||||
"""Select and retrieve information about zip codes within the Avacon Netz area.
|
||||
|
||||
This function reads a list of German zip codes, filters them based on
|
||||
geographical boundaries approximating the Avacon Netz coverage area,
|
||||
and retrieves additional information for each selected zip code using
|
||||
a public API.
|
||||
|
||||
The function uses predefined latitude and longitude ranges to determine
|
||||
if a zip code falls within the Avacon Netz area.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, List]
|
||||
A dictionary containing the following keys, each associated with a list:
|
||||
- 'latitude': Latitudes of selected zip codes
|
||||
- 'longitude': Longitudes of selected zip codes
|
||||
- 'population': Population counts for selected zip codes
|
||||
- 'city': City names for selected zip codes
|
||||
- 'zip_codes': Selected zip codes
|
||||
|
||||
Notes
|
||||
-----
|
||||
- This function requires an internet connection to access the API.
|
||||
- The function assumes the existence of a global constant DATA_DIR
|
||||
pointing to the directory containing the 'zip_codes.txt' file.
|
||||
- The API used is 'https://gvz.tuerantuer.org/api/administrative_divisions/'.
|
||||
"""
|
||||
lat_list = []
|
||||
lon_list = []
|
||||
pop_list = []
|
||||
selected_zips = []
|
||||
city_list = []
|
||||
|
||||
# Get list of all zip codes in Germany
|
||||
zip_list = read_file_to_list(DATA_DIR / "zip_codes.txt")
|
||||
|
||||
# Geographical settings:
|
||||
# The main idea is to roughly describe the Avacon Netz area, then use an API to go through all
|
||||
# zip codes in Germany and only keep those that are inside the longitude and latitude range.
|
||||
# We use the same API to get the actual latutide and longitude for each zip code and the total
|
||||
# population so that we can use this information to weight the sampling of customers.
|
||||
|
||||
# Approximate Northernmost point of Avacon Netz coverage near Geesthacht to southernmost point
|
||||
# in south Harz mountains close to Ilfeld
|
||||
min_lat, max_lat = 51.618147, 53.432608
|
||||
# Approximate Westernmost point of Avacon Netz coverage near Vechta to easternmost point near
|
||||
# Reuden/Anhalt
|
||||
min_lon, max_lon = 8.476919, 12.270417
|
||||
|
||||
for zip_code in zip_list:
|
||||
if zip_code in selected_zips:
|
||||
continue
|
||||
|
||||
# Get additional data by using this public API
|
||||
response = requests.get(
|
||||
f"https://gvz.tuerantuer.org/api/administrative_divisions/?search={zip_code}"
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
response_json = response.json()
|
||||
if response_json["count"] > 0:
|
||||
zip_data = response_json["results"][0]
|
||||
lat = zip_data["latitude"]
|
||||
lon = zip_data["longitude"]
|
||||
|
||||
# Check if the zip code is within the Avacon Netz area
|
||||
if (min_lat <= lat <= max_lat) and (min_lon <= lon <= max_lon):
|
||||
num_associated_zips = len(zip_data["zip_codes"])
|
||||
selected_zips += zip_data["zip_codes"] # Add any associated zip-codes
|
||||
lat_list += [lat] * num_associated_zips
|
||||
lon_list += [lon] * num_associated_zips
|
||||
pop_list += [zip_data["citizens_total"]] * num_associated_zips
|
||||
city_list += [zip_data["office_city"]] * num_associated_zips
|
||||
|
||||
else:
|
||||
print(f"No data found for {zip_code}")
|
||||
|
||||
result = {
|
||||
"latitude": lat_list,
|
||||
"longitude": lon_list,
|
||||
"population": pop_list,
|
||||
"city": city_list,
|
||||
"zip_codes": selected_zips,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
data: Dict[str, Union[list[Any], Dict[str, list[Any]]]] = {}
|
||||
|
||||
# Get given names and surnames
|
||||
data["surnames"] = get_datalist(DATA_DIR / "names.txt")
|
||||
data["given_names"] = get_datalist(DATA_DIR / "female_gnames.txt") + get_datalist(
|
||||
DATA_DIR / "male_gnames.txt"
|
||||
)
|
||||
|
||||
# Get list of street names
|
||||
data["streets"] = get_datalist(DATA_DIR / "street_names.txt")
|
||||
|
||||
# Get list of all zip codes and select those in the Avacon Netz area.
|
||||
# Also include geographical and population data, which is needed for sampling later.
|
||||
data["zips"] = select_zips()
|
||||
|
||||
# Save data to file
|
||||
with open(DATA_DIR / "base_data.json", "w") as file:
|
||||
json.dump(data, file, indent=4, ensure_ascii=False)
|
||||
111
data_preparation/init_sql_schema.py
Normal file
111
data_preparation/init_sql_schema.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Script to create the SQL schema in Azure SQL Database."""
|
||||
|
||||
import os
|
||||
|
||||
import pyodbc
|
||||
|
||||
schema_creation = """
|
||||
CREATE TABLE Addresses (
|
||||
ID INT PRIMARY KEY IDENTITY(1,1),
|
||||
StreetName NVARCHAR(100),
|
||||
HouseNumber NVARCHAR(10),
|
||||
City NVARCHAR(50),
|
||||
PostalCode NVARCHAR(10),
|
||||
Longitude FLOAT,
|
||||
Latitude FLOAT
|
||||
);
|
||||
|
||||
CREATE TABLE Meters (
|
||||
ID INT PRIMARY KEY IDENTITY(1,1),
|
||||
Signature NVARCHAR(11),
|
||||
MeterType NVARCHAR(3),
|
||||
AddressID INT,
|
||||
FOREIGN KEY (AddressID) REFERENCES Addresses(ID)
|
||||
);
|
||||
|
||||
CREATE TABLE Customers (
|
||||
ID INT PRIMARY KEY IDENTITY(1,1),
|
||||
FirstName NVARCHAR(100),
|
||||
LastName NVARCHAR(100),
|
||||
GasMeterID INT,
|
||||
EltMeterID INT,
|
||||
FOREIGN KEY (GasMeterID) REFERENCES Meters(ID),
|
||||
FOREIGN KEY (EltMeterID) REFERENCES Meters(ID)
|
||||
);
|
||||
|
||||
CREATE TABLE Readings (
|
||||
ID INT PRIMARY KEY IDENTITY(1,1),
|
||||
CustomerID INT,
|
||||
MeterID INT,
|
||||
ReadingDate DATE,
|
||||
ReadingValue INT,
|
||||
FOREIGN KEY (CustomerID) REFERENCES Customers(ID),
|
||||
FOREIGN KEY (MeterID) REFERENCES Meters(ID)
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
def create_tables(cursor: pyodbc.Cursor, schema: str) -> None:
|
||||
"""Create database tables based on the provided schema.
|
||||
|
||||
This function takes a database schema as a string and executes each table
|
||||
creation statement separately. It handles cases where tables already exist
|
||||
and reports this information.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
schema : str
|
||||
A string containing SQL statements for table creation, separated by
|
||||
semicolons.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
|
||||
Raises
|
||||
------
|
||||
pyodbc.ProgrammingError
|
||||
If there's an error in executing the SQL statements, other than
|
||||
"table already exists".
|
||||
|
||||
Notes
|
||||
-----
|
||||
- The function assumes the existence of a global 'conn' object for
|
||||
committing changes to the database.
|
||||
- It prints a message if a table already exists instead of raising an error.
|
||||
"""
|
||||
for table_creation in schema.split(";"):
|
||||
if table_creation.strip():
|
||||
try:
|
||||
cursor.execute(table_creation)
|
||||
conn.commit()
|
||||
except pyodbc.ProgrammingError as e:
|
||||
if "There is already an object named" in str(e):
|
||||
print(f"Table already exists: {table_creation.split('(')[0].split()[-1]}")
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Connect to the database
|
||||
connection_string = os.environ.get("AZURE_SQL_CONNECTION_STRING")
|
||||
|
||||
try:
|
||||
conn = pyodbc.connect(connection_string)
|
||||
cursor = conn.cursor()
|
||||
print("Connected to Azure SQL Database successfully!")
|
||||
|
||||
# Example query
|
||||
create_tables(cursor, schema_creation)
|
||||
|
||||
except pyodbc.Error as e:
|
||||
print(f"Error connecting to Azure SQL Database: {e}")
|
||||
finally:
|
||||
if "cursor" in locals():
|
||||
cursor.close()
|
||||
if "conn" in locals():
|
||||
conn.close()
|
||||
214
data_preparation/insert_sql.py
Normal file
214
data_preparation/insert_sql.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""Script to insert the preprocessed data into the Azure SQL Database."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import List, Tuple
|
||||
|
||||
import pyodbc
|
||||
from config import DATA_DIR
|
||||
|
||||
|
||||
def insert_addresses(
|
||||
cursor: pyodbc.Cursor, addresses: List[Tuple[str, str, str, str, float, float]]
|
||||
) -> None:
|
||||
"""Insert multiple addresses into the Addresses table using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
addresses : List[Tuple[str, str, str, str, float, float]]
|
||||
A list of tuples, where each tuple represents an address with the following elements:
|
||||
(StreetName, HouseNumber, City, PostalCode, Longitude, Latitude).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for address in addresses:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Addresses (StreetName, HouseNumber, City, PostalCode, Longitude, Latitude)
|
||||
VALUES (?, ?, ?, ?, ?, ?);
|
||||
""",
|
||||
address,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_meters(cursor: pyodbc.Cursor, meter_data: List[Tuple[str, str, int]]) -> None:
|
||||
"""Insert multiple meters into the Meters table in a database using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
meter_data : List[Tuple[str, str, int]]
|
||||
A list of tuples, where each tuple represents a meter with the following elements:
|
||||
(Signature, MeterType, AddressID).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for meter in meter_data:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Meters (Signature, MeterType, AddressID)
|
||||
VALUES (?, ?, ?);
|
||||
""",
|
||||
meter,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_customers(cursor: pyodbc.Cursor, customer_data: List[Tuple[str, str, int, int]]) -> None:
|
||||
"""Insert multiple customers into the Customers table in a database using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
customer_data : List[Tuple[str, str, int, int]]
|
||||
A list of tuples, where each tuple represents a customer with the following elements:
|
||||
(FirstName, LastName, GasMeterID, EltMeterID).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for customer in customer_data:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Customers (FirstName, LastName, GasMeterID, EltMeterID)
|
||||
VALUES (?, ?, ?, ?);
|
||||
""",
|
||||
customer,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_readings(
|
||||
cursor: pyodbc.Cursor, readings_data: List[Tuple[int, int, datetime, int]]
|
||||
) -> None:
|
||||
"""Insert multiple readings into the Readings table in a database using pyodbc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cursor : pyodbc.Cursor
|
||||
A pyodbc cursor object used to execute SQL commands.
|
||||
readings_data : List[Tuple[int, int, datetime, int]]
|
||||
A list of tuples, where each tuple represents a reading with the following elements:
|
||||
(CustomerID, MeterID, ReadingDate, ReadingValue).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
This function doesn't return any value.
|
||||
"""
|
||||
for reading in readings_data:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO Readings (CustomerID, MeterID, ReadingDate, ReadingValue)
|
||||
VALUES (?, ?, ?, ?);
|
||||
""",
|
||||
reading,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
conn_str = os.environ.get("AZURE_SQL_CONNECTION_STRING")
|
||||
try:
|
||||
conn = pyodbc.connect(conn_str)
|
||||
cursor = conn.cursor()
|
||||
print("Connected to Azure SQL Database successfully!")
|
||||
|
||||
except pyodbc.Error as e:
|
||||
print(f"Error connecting to Azure SQL Database: {e}")
|
||||
|
||||
# Load generated customer data
|
||||
with open(DATA_DIR / "customers.json", "r") as file:
|
||||
customers = json.load(file)
|
||||
|
||||
# Insert data into the database
|
||||
# start with addresses
|
||||
full_addr_list = []
|
||||
for c in customers:
|
||||
full_addr_list.append(
|
||||
(
|
||||
c["street"],
|
||||
c["house_number"],
|
||||
c["city"],
|
||||
c["zip_code"],
|
||||
c["longitude"],
|
||||
c["latitude"],
|
||||
)
|
||||
)
|
||||
|
||||
unique_addresses = list(set(full_addr_list))
|
||||
|
||||
address_ids = []
|
||||
for i in range(len(customers)):
|
||||
address_ids.append(unique_addresses.index(full_addr_list[i]) + 1)
|
||||
|
||||
gas_meter_data = []
|
||||
elt_meter_data = []
|
||||
gas_readings_data = []
|
||||
elt_readings_data = []
|
||||
customer_data = []
|
||||
gas_id = 1
|
||||
elt_id = len(customers) + 1
|
||||
for c_idx, c in enumerate(customers):
|
||||
customer_data.append((c["given_name"], c["surname"], c_idx + 1, c_idx + 1))
|
||||
for r in range(len(c["readings_gas"][1])):
|
||||
if r == 0:
|
||||
gas_meter_data.append((c["readings_gas"][0], "GAS", address_ids[c_idx]))
|
||||
elt_meter_data.append((c["readings_elt"][0], "ELT", address_ids[c_idx]))
|
||||
gas_readings_data.append(
|
||||
(
|
||||
c_idx + 1,
|
||||
gas_id,
|
||||
datetime.fromisoformat(c["readings_gas"][1][r]),
|
||||
c["readings_gas"][2][r],
|
||||
)
|
||||
) # CustomerID, MeterID, ReadingDate, ReadingValue
|
||||
elt_readings_data.append(
|
||||
(
|
||||
c_idx + 1,
|
||||
elt_id,
|
||||
datetime.fromisoformat(c["readings_elt"][1][r]),
|
||||
c["readings_elt"][2][r],
|
||||
)
|
||||
)
|
||||
gas_id += 1
|
||||
elt_id += 1
|
||||
|
||||
print("Inserting addresses into the database...")
|
||||
insert_addresses(cursor, unique_addresses)
|
||||
|
||||
print("Inserting gas meter data into the database...")
|
||||
insert_meters(cursor, gas_meter_data)
|
||||
|
||||
print("Inserting electricity meter data into the database...")
|
||||
insert_meters(cursor, elt_meter_data)
|
||||
|
||||
print("Inserting customer data into the database...")
|
||||
insert_customers(cursor, customer_data)
|
||||
|
||||
print("Inserting gas readings into the database...")
|
||||
insert_readings(cursor, gas_readings_data)
|
||||
|
||||
print("Inserting electricity readings into the database...")
|
||||
insert_readings(cursor, elt_readings_data)
|
||||
|
||||
print("Data successfully inserted into the database!")
|
||||
|
||||
if "cursor" in locals():
|
||||
cursor.close()
|
||||
if "conn" in locals():
|
||||
conn.close()
|
||||
56
data_preparation/test_sql_connection.py
Normal file
56
data_preparation/test_sql_connection.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Script to test connection to Azure SQL Database."""
|
||||
|
||||
import os
|
||||
|
||||
import pyodbc
|
||||
|
||||
|
||||
def test_connection() -> bool:
|
||||
"""Test the connection to Azure SQL Database.
|
||||
|
||||
This function attempts to establish a connection to an Azure SQL Database
|
||||
using a connection string stored in the environment variable
|
||||
'AZURE_SQL_CONNECTION_STRING'. It executes a simple query to verify
|
||||
the connection and prints the result of the connection attempt.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the connection was successful and closed properly,
|
||||
False if there was an error connecting to the database.
|
||||
|
||||
Notes
|
||||
-----
|
||||
- The function requires the 'AZURE_SQL_CONNECTION_STRING' environment
|
||||
variable to be set with a valid connection string.
|
||||
- It uses pyodbc to establish the database connection.
|
||||
- The function prints success or error messages to stdout.
|
||||
- In case of a successful connection, it executes "SELECT @@version;"
|
||||
as a test query.
|
||||
- The function ensures that both the cursor and the connection are
|
||||
closed after the operation, regardless of its success or failure.
|
||||
"""
|
||||
connection_string = os.environ.get("AZURE_SQL_CONNECTION_STRING")
|
||||
try:
|
||||
conn = pyodbc.connect(connection_string)
|
||||
cursor = conn.cursor()
|
||||
print("Connected to Azure SQL Database successfully!")
|
||||
|
||||
# Example query
|
||||
cursor.execute("SELECT @@version;")
|
||||
|
||||
except pyodbc.Error as e:
|
||||
print(f"Error connecting to Azure SQL Database: {e}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
if "cursor" in locals():
|
||||
cursor.close()
|
||||
if "conn" in locals():
|
||||
conn.close()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_connection()
|
||||
198
poetry.lock
generated
198
poetry.lock
generated
@@ -802,6 +802,67 @@ files = [
|
||||
{file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "2.1.0"
|
||||
description = "Fundamental package for array computing in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
files = [
|
||||
{file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"},
|
||||
{file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"},
|
||||
{file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"},
|
||||
{file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"},
|
||||
{file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"},
|
||||
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"},
|
||||
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"},
|
||||
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"},
|
||||
{file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"},
|
||||
{file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"},
|
||||
{file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpydoc"
|
||||
version = "1.8.0"
|
||||
@@ -866,6 +927,79 @@ files = [
|
||||
dev = ["pytest", "tox"]
|
||||
lint = ["black"]
|
||||
|
||||
[[package]]
|
||||
name = "pandas"
|
||||
version = "2.2.2"
|
||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
|
||||
{file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
|
||||
{file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
|
||||
{file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
|
||||
{file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
|
||||
{file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
|
||||
{file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
|
||||
{file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
|
||||
{file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
|
||||
{file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
|
||||
{file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||
]
|
||||
python-dateutil = ">=2.8.2"
|
||||
pytz = ">=2020.1"
|
||||
tzdata = ">=2022.7"
|
||||
|
||||
[package.extras]
|
||||
all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
|
||||
aws = ["s3fs (>=2022.11.0)"]
|
||||
clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
|
||||
compression = ["zstandard (>=0.19.0)"]
|
||||
computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
|
||||
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
|
||||
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
|
||||
feather = ["pyarrow (>=10.0.1)"]
|
||||
fss = ["fsspec (>=2022.11.0)"]
|
||||
gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
|
||||
hdf5 = ["tables (>=3.8.0)"]
|
||||
html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
|
||||
mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
|
||||
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
|
||||
parquet = ["pyarrow (>=10.0.1)"]
|
||||
performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
|
||||
plot = ["matplotlib (>=3.6.3)"]
|
||||
postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
|
||||
pyarrow = ["pyarrow (>=10.0.1)"]
|
||||
spss = ["pyreadstat (>=1.2.0)"]
|
||||
sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
|
||||
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
|
||||
xml = ["lxml (>=4.9.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "pathspec"
|
||||
version = "0.12.1"
|
||||
@@ -995,6 +1129,46 @@ pyyaml = "*"
|
||||
[package.extras]
|
||||
extra = ["pygments (>=2.12)"]
|
||||
|
||||
[[package]]
|
||||
name = "pyodbc"
|
||||
version = "5.1.0"
|
||||
description = "DB API module for ODBC"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pyodbc-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:02fe9821711a2d14415eaeb4deab471d2c8b7034b107e524e414c0e133c42248"},
|
||||
{file = "pyodbc-5.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2cbdbd019756285dc44bc35238a3ed8dfaa454e8c8b2c3462f1710cfeebfb290"},
|
||||
{file = "pyodbc-5.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84df3bbce9bafe65abd25788d55c9f1da304f6115d70f25758ff8c85f3ce0517"},
|
||||
{file = "pyodbc-5.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218bb75d4bc67075529a65ce8ec7daeed1d83c33dd7410450fbf68d43d184d28"},
|
||||
{file = "pyodbc-5.1.0-cp310-cp310-win32.whl", hash = "sha256:eae576b3b67d21d6f237e18bb5f3df8323a2258f52c3e3afeef79269704072a9"},
|
||||
{file = "pyodbc-5.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:c3b65343557f4c7753204e06f4c82c97ed212a636501f4bc27c5ce0e549eb3e8"},
|
||||
{file = "pyodbc-5.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa6f46377da303bf79bcb4b559899507df4b2559f30dcfdf191358ee4b99f3ab"},
|
||||
{file = "pyodbc-5.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b19d7f44cfee89901e482f554a88177e83fae76b03c3f830e0023a195d840220"},
|
||||
{file = "pyodbc-5.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c36448322f8d6479d87c528cf52401a6ea4f509b9637750b67340382b4e1b40"},
|
||||
{file = "pyodbc-5.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c5e0cb79222aad4b31a3602e39b242683c29c6221a16ed43f45f18fd0b73659"},
|
||||
{file = "pyodbc-5.1.0-cp311-cp311-win32.whl", hash = "sha256:92caed9d445815ed3f7e5a1249e29a4600ebc1e99404df81b6ed7671074c9227"},
|
||||
{file = "pyodbc-5.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1bd14633e91b7a9814f4fd944c9ebb89fb7f1fd4710c4e3999b5ef041536347"},
|
||||
{file = "pyodbc-5.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d3d9cc4af703c4817b6e604315910b0cf5dcb68056d52b25ca072dd59c52dcbc"},
|
||||
{file = "pyodbc-5.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:406b8fa2133a7b6a713aa5187dba2d08cf763b5884606bed77610a7660fdfabe"},
|
||||
{file = "pyodbc-5.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8488c3818f12207650836c5c6f7352f9ff9f56a05a05512145995e497c0bbb1"},
|
||||
{file = "pyodbc-5.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0df69e3a500791b70b5748c68a79483b24428e4c16027b56aa0305e95c143a4"},
|
||||
{file = "pyodbc-5.1.0-cp312-cp312-win32.whl", hash = "sha256:aa4e02d3a9bf819394510b726b25f1566f8b3f0891ca400ad2d4c8b86b535b78"},
|
||||
{file = "pyodbc-5.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:33f4984af38872e7bdec78007a34e4d43ae72bf9d0bae3344e79d9d0db157c0e"},
|
||||
{file = "pyodbc-5.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29425e2d366e7f5828b76c7993f412a3db4f18bd5bcee00186c00b5a5965e205"},
|
||||
{file = "pyodbc-5.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2bbd2e75c77dee9f3cd100c3246110abaeb9af3f7fa304ccc2934ff9c6a4fa4"},
|
||||
{file = "pyodbc-5.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3602136a936bc0c1bb9722eb2fbf2042b3ff1ddccdc4688e514b82d4b831563b"},
|
||||
{file = "pyodbc-5.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bed1c843565d3a4fd8c332ebceaf33efe817657a0505eacb97dd1b786a985b0b"},
|
||||
{file = "pyodbc-5.1.0-cp38-cp38-win32.whl", hash = "sha256:735f6da3762e5856b5580be0ed96bb946948346ebd1e526d5169a5513626a67a"},
|
||||
{file = "pyodbc-5.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:c5bb4e43f6c72f5fa2c634570e0d761767d8ea49f39205229b812fb4d3fe05aa"},
|
||||
{file = "pyodbc-5.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:33f0f1d7764cefef6f787936bd6359670828a6086be67518ab951f1f7f503cda"},
|
||||
{file = "pyodbc-5.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:be3b1c36c31ec7d73d0b34a8ad8743573763fadd8f2bceef1e84408252b48dce"},
|
||||
{file = "pyodbc-5.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e71a51c252b503b4d753e21ed31e640015fc0d00202d42ea42f2396fcc924b4a"},
|
||||
{file = "pyodbc-5.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af5282cc8b667af97d76f4955250619a53f25486cbb6b1f45a06b781006ffa0b"},
|
||||
{file = "pyodbc-5.1.0-cp39-cp39-win32.whl", hash = "sha256:96b2a8dc27693a517e3aad3944a7faa8be95d40d7ec1eda51a1885162eedfa33"},
|
||||
{file = "pyodbc-5.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:e738c5eedb4a0cbab20cc008882f49b106054499db56864057c2530ff208cf32"},
|
||||
{file = "pyodbc-5.1.0.tar.gz", hash = "sha256:397feee44561a6580be08cedbe986436859563f4bb378f48224655c8e987ea60"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.3.2"
|
||||
@@ -1045,6 +1219,17 @@ files = [
|
||||
[package.extras]
|
||||
cli = ["click (>=5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2024.1"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
|
||||
{file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.2"
|
||||
@@ -1480,6 +1665,17 @@ files = [
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tzdata"
|
||||
version = "2024.1"
|
||||
description = "Provider of IANA time zone data"
|
||||
optional = false
|
||||
python-versions = ">=2"
|
||||
files = [
|
||||
{file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
|
||||
{file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.2.2"
|
||||
@@ -1598,4 +1794,4 @@ type = ["pytest-mypy"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "1ace87aaeab8e9964d3f2e843eb5a0d15c1506bb0d1871dc41a9a38f5ed3a352"
|
||||
content-hash = "3f76ad143c2f6e35163d1bc8d123b97b9bf2a96f86914d240e0c7da2226f1d85"
|
||||
|
||||
@@ -32,6 +32,8 @@ python = "^3.10"
|
||||
plotly = "^5.23.0"
|
||||
dash = "^2.17.1"
|
||||
gunicorn = "^23.0.0"
|
||||
pyodbc = "^5.1.0"
|
||||
pandas = "^2.2.2"
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
mkdocs = "^1.6.0"
|
||||
|
||||
Reference in New Issue
Block a user