python3 -m venv wb_analytics_env
source wb_analytics_env/bin/activate
pip install requests pip install yandexcloud requests clickhouse-connect yc managed-clickhouse cluster create \
--name wb-analytics \
--network-name default \
--host type=clickhouse,zone-id=ru-central1-a,assign-public-ip=true \
--clickhouse-resource-preset b3-c1-m4 \
--clickhouse-disk-size 10 \
--database name=wb_data \
--user name=ch_admin,password=SecurePass123! [
{
"date": "2026-03-04T18:08:31",
"lastChangeDate": "2026-03-06T10:11:07",
"warehouseName": "Москва",
"warehouseType": "Склад продавца",
"countryName": "Россия",
"oblastOkrugName": "Центральный федеральный округ",
"regionName": "Московский",
"supplierArticle": "12345",
"nmId": 98755111,
"barcode": "123453119222",
"category": "Цифровые товары",
"subject": "Подписка",
"brand": "api-master.ru",
"techSize": "0",
"incomeID": 16735451,
"isSupply": false,
"isRealization": true,
"totalPrice": 15000,
"discountPercent": 18,
"spp": 26,
"finishedPrice": 1145,
"priceWithDisc": 1547,
"isCancel": true,
"cancelDate": "2026-03-09T00:00:00",
"sticker": "923912515",
"gNumber": "11113162218572569531",
"srid": "11.rf921fce16823117b0nhj96222382382.3.0"
}
] CREATE TABLE IF NOT EXISTS wb_sales (
date DateTime,
lastChangeDate DateTime,
warehouseName String,
warehouseType String,
countryName String,
oblastOkrugName String,
regionName String,
supplierArticle String,
nmId UInt64,
barcode String,
category String,
subject String,
brand String,
techSize String,
incomeID UInt64,
isSupply UInt8, -- ClickHouse хранит boolean как 0 или 1
isRealization UInt8,
totalPrice Float64,
discountPercent UInt8,
spp UInt8,
finishedPrice Float64,
priceWithDisc Float64,
isCancel UInt8,
cancelDate DateTime,
sticker String,
gNumber String,
srid String
) ENGINE = MergeTree()
ORDER BY (date, nmId); import requests
import psycopg2
from psycopg2.extras import execute_values
# Подключение к PostgreSQL в Yandex Cloud
conn = psycopg2.connect(
host="rc1a-xxxxxx.mdb.yandexcloud.net",
port=6432,
dbname="wb_data",
user="pg_user",
password="SecurePass123!",
target_session_attrs="read-write"
)
cursor = conn.cursor()
# Запрос к WB
url = "https://statistics-api.wildberries.ru/api/v1/supplier/sales?dateFrom=2026-03-01"
headers = {"Authorization": "ВАШ_ТОКЕН"}
wb_data = requests.get(url, headers=headers).json()
if wb_data:
columns = list(wb_data[0].keys())
# Формируем кортежи для быстрой вставки
values = [[row.get(col) for col in columns] for row in wb_data]
insert_query = f"""
INSERT INTO wb_sales ({','.join(columns)})
VALUES %s
ON CONFLICT (srid) DO NOTHING;
"""
execute_values(cursor, insert_query, values)
conn.commit()
print(f"PostgreSQL: загружено {len(values)} строк.")
cursor.close()
conn.close() CREATE TABLE IF NOT EXISTS wb_sales (
date DateTime,
lastChangeDate DateTime,
warehouseName String,
warehouseType String,
countryName String,
oblastOkrugName String,
regionName String,
supplierArticle String,
nmId UInt64,
barcode String,
category String,
subject String,
brand String,
techSize String,
incomeID UInt64,
isSupply UInt8, -- ClickHouse хранит boolean как 0 или 1
isRealization UInt8,
totalPrice Float64,
discountPercent UInt8,
spp UInt8,
finishedPrice Float64,
priceWithDisc Float64,
isCancel UInt8,
cancelDate DateTime,
sticker String,
gNumber String,
srid String
) ENGINE = MergeTree()
ORDER BY (date, nmId); import requests
import clickhouse_connect
# Подключение к ClickHouse
client = clickhouse_connect.get_client(
host='rc1a-xxxxxx.mdb.yandexcloud.net',
port=8443,
username='ch_user',
password='SecurePass123!',
secure=True,
ca_cert='/path/to/YandexInternalRootCA.crt'
)
# Запрос к WB
url = "https://statistics-api.wildberries.ru/api/v1/supplier/sales?dateFrom=2026-03-01"
headers = {"Authorization": "ВАШ_ТОКЕН"}
wb_data = requests.get(url, headers=headers).json()
if wb_data:
columns = list(wb_data[0].keys())
data_to_insert = []
for row in wb_data:
# ClickHouse не принимает пустые даты "0001-01-01"
if row.get('cancelDate') == "0001-01-01T00:00:00":
row['cancelDate'] = "1970-01-01T00:00:00"
data_to_insert.append([row.get(col) for col in columns])
# Массовая вставка
client.insert('wb_sales', data_to_insert, column_names=columns)
print(f"ClickHouse: загружено {len(data_to_insert)} строк.") CREATE TABLE wb_sales (
date Datetime,
lastChangeDate Datetime,
warehouseName Utf8,
warehouseType Utf8,
countryName Utf8,
oblastOkrugName Utf8,
regionName Utf8,
supplierArticle Utf8,
nmId Uint64,
barcode Utf8,
category Utf8,
subject Utf8,
brand Utf8,
techSize Utf8,
incomeID Uint64,
isSupply Bool,
isRealization Bool,
totalPrice Double,
discountPercent Uint32,
spp Uint32,
finishedPrice Double,
priceWithDisc Double,
isCancel Bool,
cancelDate Datetime,
sticker Utf8,
gNumber Utf8,
srid Utf8,
PRIMARY KEY (srid) -- YDB требует явный первичный ключ
); import requests
import ydb
import ydb.iam
from datetime import datetime
# Настройки YDB
endpoint = "grpcs://ydb.serverless.yandexcloud.net:2135"
database = "/ru-central1/b1g.../etn..." # Ваш путь к БД из консоли
# Авторизация (через сервисный аккаунт или IAM-токен)
driver_config = ydb.DriverConfig(
endpoint, database, credentials=ydb.iam.MetadataUrlCredentials()
)
# Запрос к WB
url = "https://statistics-api.wildberries.ru/api/v1/supplier/sales?dateFrom=2026-03-01"
headers = {"Authorization": "ВАШ_ТОКЕН"}
wb_data = requests.get(url, headers=headers).json()
if wb_data:
# Конвертируем строки дат в timestamp для YDB
for row in wb_data:
try:
row['date'] = int(datetime.strptime(row['date'], "%Y-%m-%dT%H:%M:%S").timestamp())
if row.get('cancelDate') and row['cancelDate'] != "0001-01-01T00:00:00":
row['cancelDate'] = int(datetime.strptime(row['cancelDate'], "%Y-%m-%dT%H:%M:%S").timestamp())
else:
row['cancelDate'] = None
except ValueError:
pass
# YDB bulk_upsert принимает список словарей с типами
with ydb.Driver(driver_config) as driver:
driver.wait(timeout=5)
# Описываем типы столбцов для Bulk Upsert
column_types = ydb.BulkUpsertColumns()
column_types.add_column('srid', ydb.OptionalType(ydb.PrimitiveType.Utf8))
column_types.add_column('date', ydb.OptionalType(ydb.PrimitiveType.Datetime))
column_types.add_column('nmId', ydb.OptionalType(ydb.PrimitiveType.Uint64))
# ... (добавьте остальные типы согласно DDL) ...
# Выполняем массовую вставку
driver.table_client.bulk_upsert(f"{database}/wb_sales", wb_data, column_types)
print(f"YDB: загружено {len(wb_data)} строк.")