OpenAI Completions 使用 API 扩展示例
对于我们的大多数用户来说,默认的使用和成本仪表板已经足够。但是,如果您需要更详细的数据或自定义仪表板,可以使用 Completions Usage API。
本 notebook 演示了如何从 OpenAI Completions Usage API 和 Costs API 中检索和可视化使用数据。我们将:
- 调用 API 获取 completions 使用数据。
- 将 JSON 响应解析为 pandas DataFrame。
- 使用 matplotlib 可视化随时间变化的 token 使用情况。
- 按模型进行分组,以分析不同模型的 token 使用情况。
- 使用饼图显示模型分布。
我们还包含了所有可能的 API 参数的占位符,以提供全面的概述。
# 安装所需的库(如果尚未安装)
!pip install requests pandas numpy matplotlib --quiet
# 导入库
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import time
import json
# 在 Jupyter 中进行内联绘图
%matplotlib inline
设置 API 凭据和参数
设置管理员密钥 - https://platform.openai.com/settings/organization/admin-keys
将 'PLACEHOLDER'
替换为您的实际管理员 API 密钥。最佳实践是出于安全原因从环境变量加载密钥。
# 用于从 API 检索分页数据的可重用函数
def get_data(url, params):
# 设置 API 密钥和标头
OPENAI_ADMIN_KEY = 'PLACEHOLDER'
headers = {
"Authorization": f"Bearer {OPENAI_ADMIN_KEY}",
"Content-Type": "application/json",
}
# 初始化一个空列表以存储所有数据
all_data = []
# 初始化分页游标
page_cursor = None
# 循环处理分页
while True:
if page_cursor:
params["page"] = page_cursor
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data_json = response.json()
all_data.extend(data_json.get("data", []))
page_cursor = data_json.get("next_page")
if not page_cursor:
break
else:
print(f"Error: {response.status_code}")
break
if all_data:
print("Data retrieved successfully!")
else:
print("Issue: No data available to retrieve.")
return all_data
# 定义 API 端点
url = "https://api.openai.com/v1/organization/usage/completions"
# 计算开始时间:从现在开始 n 天前
days_ago = 30
start_time = int(time.time()) - (days_ago * 24 * 60 * 60)
# 定义带有所有可能选项占位符的参数
params = {
"start_time": start_time, # 必需:开始时间(Unix 秒)
# "end_time": end_time, # 可选:结束时间(Unix 秒)
"bucket_width": "1d", # 可选:'1m'、'1h' 或 '1d'(默认为 '1d')
# "project_ids": ["proj_example"], # 可选:项目 ID 列表
# "user_ids": ["user_example"], # 可选:用户 ID 列表
# "api_key_ids": ["key_example"], # 可选:API 密钥 ID 列表
# "models": ["o1-2024-12-17", "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18"], # 可选:模型列表
# "batch": False, # 可选:批处理作业为 True,非批处理为 False
# "group_by": ["model"], # 可选:用于分组的字段
"limit": 7, # 可选:要返回的桶数,这将把数据分成 7 个桶
# "page": "cursor_string" # 可选:分页游标
}
usage_data = get_data(url, params)
Data retrieved successfully!
查看 JSON 响应
让我们看一下 API 的原始 JSON 响应,以了解其结构。
print(json.dumps(usage_data, indent=2))
[
{
"object": "bucket",
"start_time": 1736616660,
"end_time": 1736640000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 141201,
"output_tokens": 9756,
"num_model_requests": 470,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1736640000,
"end_time": 1736726400,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 45949,
"output_tokens": 282,
"num_model_requests": 150,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1736726400,
"end_time": 1736812800,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 3718360,
"output_tokens": 97756,
"num_model_requests": 3053,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 76544,
"input_audio_tokens": 5776,
"output_audio_tokens": 3166
}
]
},
{
"object": "bucket",
"start_time": 1736812800,
"end_time": 1736899200,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 52786,
"output_tokens": 38204,
"num_model_requests": 157,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 5440,
"input_audio_tokens": 4066,
"output_audio_tokens": 1097
}
]
},
{
"object": "bucket",
"start_time": 1736899200,
"end_time": 1736985600,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 35664,
"output_tokens": 1835,
"num_model_requests": 55,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 192,
"input_audio_tokens": 2520,
"output_audio_tokens": 1549
}
]
},
{
"object": "bucket",
"start_time": 1736985600,
"end_time": 1737072000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 5464,
"output_tokens": 2667,
"num_model_requests": 8,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737072000,
"end_time": 1737158400,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 3390547,
"output_tokens": 38604,
"num_model_requests": 2687,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 25344,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737158400,
"end_time": 1737244800,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 8117824,
"output_tokens": 105662,
"num_model_requests": 6335,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 46464,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737244800,
"end_time": 1737331200,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 13542,
"output_tokens": 85,
"num_model_requests": 46,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737331200,
"end_time": 1737417600,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 0,
"output_tokens": 0,
"num_model_requests": 0,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737417600,
"end_time": 1737504000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 29806,
"output_tokens": 57604,
"num_model_requests": 98,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737504000,
"end_time": 1737590400,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 1823,
"output_tokens": 1467,
"num_model_requests": 7,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737590400,
"end_time": 1737676800,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 7126,
"output_tokens": 1896,
"num_model_requests": 19,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737676800,
"end_time": 1737763200,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 22187,
"output_tokens": 822,
"num_model_requests": 75,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 640,
"input_audio_tokens": 2557,
"output_audio_tokens": 3103
}
]
},
{
"object": "bucket",
"start_time": 1737763200,
"end_time": 1737849600,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 30204,
"output_tokens": 65673,
"num_model_requests": 99,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737849600,
"end_time": 1737936000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 0,
"output_tokens": 0,
"num_model_requests": 0,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1737936000,
"end_time": 1738022400,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 0,
"output_tokens": 0,
"num_model_requests": 0,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738022400,
"end_time": 1738108800,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 2541,
"output_tokens": 1604,
"num_model_requests": 14,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738108800,
"end_time": 1738195200,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 68339,
"output_tokens": 49525,
"num_model_requests": 217,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 7296,
"input_audio_tokens": 20033,
"output_audio_tokens": 3168
}
]
},
{
"object": "bucket",
"start_time": 1738195200,
"end_time": 1738281600,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 18481,
"output_tokens": 17500,
"num_model_requests": 84,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 2944,
"input_audio_tokens": 10076,
"output_audio_tokens": 4966
}
]
},
{
"object": "bucket",
"start_time": 1738281600,
"end_time": 1738368000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 1187894,
"output_tokens": 139134,
"num_model_requests": 5528,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 2112,
"input_audio_tokens": 4935,
"output_audio_tokens": 993
}
]
},
{
"object": "bucket",
"start_time": 1738368000,
"end_time": 1738454400,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 0,
"output_tokens": 0,
"num_model_requests": 0,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738454400,
"end_time": 1738540800,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 7268,
"output_tokens": 30563,
"num_model_requests": 24,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738540800,
"end_time": 1738627200,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 15121,
"output_tokens": 22866,
"num_model_requests": 48,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738627200,
"end_time": 1738713600,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 16735,
"output_tokens": 16177,
"num_model_requests": 50,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 1152,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738713600,
"end_time": 1738800000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 6573,
"output_tokens": 4238,
"num_model_requests": 43,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738800000,
"end_time": 1738886400,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 1402,
"output_tokens": 2042,
"num_model_requests": 18,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738886400,
"end_time": 1738972800,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 11847,
"output_tokens": 21938,
"num_model_requests": 47,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1738972800,
"end_time": 1739059200,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 1993,
"output_tokens": 12,
"num_model_requests": 7,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1739059200,
"end_time": 1739145600,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 0,
"output_tokens": 0,
"num_model_requests": 0,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
},
{
"object": "bucket",
"start_time": 1739145600,
"end_time": 1739232000,
"results": [
{
"object": "organization.usage.completions.result",
"input_tokens": 332,
"output_tokens": 1509,
"num_model_requests": 8,
"project_id": null,
"user_id": null,
"api_key_id": null,
"model": null,
"batch": null,
"input_cached_tokens": 0,
"input_audio_tokens": 0,
"output_audio_tokens": 0
}
]
}
]
解析 API 响应并创建 DataFrame
现在我们将解析 JSON 数据,提取相关字段,并创建一个 pandas DataFrame 以便进行更轻松的操作和分析。
# 初始化一个列表来保存解析后的记录
records = []
# 遍历数据以提取分桶数据
for bucket in usage_data:
start_time = bucket.get("start_time")
end_time = bucket.get("end_time")
for result in bucket.get("results", []):
records.append(
{
"start_time": start_time,
"end_time": end_time,
"input_tokens": result.get("input_tokens", 0),
"output_tokens": result.get("output_tokens", 0),
"input_cached_tokens": result.get("input_cached_tokens", 0),
"input_audio_tokens": result.get("input_audio_tokens", 0),
"output_audio_tokens": result.get("output_audio_tokens", 0),
"num_model_requests": result.get("num_model_requests", 0),
"project_id": result.get("project_id"),
"user_id": result.get("user_id"),
"api_key_id": result.get("api_key_id"),
"model": result.get("model"),
"batch": result.get("batch"),
}
)
# 从记录创建 DataFrame
df = pd.DataFrame(records)
# 将 Unix 时间戳转换为日期时间以便阅读
df["start_datetime"] = pd.to_datetime(df["start_time"], unit="s")
df["end_datetime"] = pd.to_datetime(df["end_time"], unit="s")
# 重新排列列以便更好地阅读
df = df[
[
"start_datetime",
"end_datetime",
"start_time",
"end_time",
"input_tokens",
"output_tokens",
"input_cached_tokens",
"input_audio_tokens",
"output_audio_tokens",
"num_model_requests",
"project_id",
"user_id",
"api_key_id",
"model",
"batch",
]
]
# 显示 DataFrame
df.head()
start_datetime | end_datetime | start_time | end_time | input_tokens | output_tokens | input_cached_tokens | input_audio_tokens | output_audio_tokens | num_model_requests | project_id | user_id | api_key_id | model | batch | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2025-01-11 17:31:00 | 2025-01-12 | 1736616660 | 1736640000 | 141201 | 9756 | 0 | 0 | 0 | 470 | None | None | None | None | None |
1 | 2025-01-12 00:00:00 | 2025-01-13 | 1736640000 | 1736726400 | 45949 | 282 | 0 | 0 | 0 | 150 | None | None | None | None | None |
2 | 2025-01-13 00:00:00 | 2025-01-14 | 1736726400 | 1736812800 | 3718360 | 97756 | 76544 | 5776 | 3166 | 3053 | None | None | None | None | None |
3 | 2025-01-14 00:00:00 | 2025-01-15 | 1736812800 | 1736899200 | 52786 | 38204 | 5440 | 4066 | 1097 | 157 | None | None | None | None | None |
4 | 2025-01-15 00:00:00 | 2025-01-16 | 1736899200 | 1736985600 | 35664 | 1835 | 192 | 2520 | 1549 | 55 | None | None | None | None | None |
可视化随时间变化的 Token 使用情况
我们将创建一个条形图来可视化每个时间段的输入和输出 token 使用情况。
if not df.empty:
plt.figure(figsize=(12, 6))
# 创建输入和输出 token 的条形图
width = 0.35 # 条形的宽度
indices = range(len(df))
plt.bar(indices, df["input_tokens"], width=width, label="Input Tokens", alpha=0.7)
plt.bar(
[i + width for i in indices],
df["output_tokens"],
width=width,
label="Output Tokens",
alpha=0.7,
)
# 设置标签和刻度
plt.xlabel("Time Bucket")
plt.ylabel("Number of Tokens")
plt.title("Daily Input vs Output Token Usage Last 30 Days")
plt.xticks(
[i + width / 2 for i in indices],
[dt.strftime("%Y-%m-%d") for dt in df["start_datetime"]],
rotation=45,
)
plt.legend()
plt.tight_layout()
plt.show()
else:
print("No data available to plot.")
可视化示例:按模型分组
在本节中,我们将检索并可视化按模型和 project_id 分组的使用数据。这可以帮助您查看在指定期间内每个模型使用的总 token 数。
关于 grouping 参数的说明
-
如果不指定
group_by
参数,project_id
、model
等字段将返回null
。 尽管group_by
参数是可选的,但在大多数情况下建议包含它以检索有意义的数据。 -
您可以通过用逗号分隔来指定多个分组字段。例如:
group_by=["model", "project_id"]
。
# 计算开始时间:从现在开始 n 天前
days_ago = 30
start_time = int(time.time()) - (days_ago * 24 * 60 * 60)
# 定义按模型和 project_id 分组的参数
params = {
"start_time": start_time, # 必需:开始时间(Unix 秒)
"bucket_width": "1d", # 可选:'1m'、'1h' 或 '1d'(默认为 '1d')
"group_by": ["model", "project_id"], # 按模型和 project_id 分组数据
"limit": 7, # 可选:要返回的桶数
}
# 初始化一个空列表以存储所有数据
all_group_data = get_data(url, params)
# 初始化一个列表来保存解析后的记录
records = []
# 遍历数据以提取分桶数据
for bucket in all_group_data:
start_time = bucket.get("start_time")
end_time = bucket.get("end_time")
for result in bucket.get("results", []):
records.append(
{
"start_time": start_time,
"end_time": end_time,
"input_tokens": result.get("input_tokens", 0),
"output_tokens": result.get("output_tokens", 0),
"input_cached_tokens": result.get("input_cached_tokens", 0),
"input_audio_tokens": result.get("input_audio_tokens", 0),
"output_audio_tokens": result.get("output_audio_tokens", 0),
"num_model_requests": result.get("num_model_requests", 0),
"project_id": result.get("project_id", "N/A"),
"user_id": result.get("user_id", "N/A"),
"api_key_id": result.get("api_key_id", "N/A"),
"model": result.get("model", "N/A"),
"batch": result.get("batch", "N/A"),
}
)
# 从记录创建 DataFrame
df = pd.DataFrame(records)
# 将 Unix 时间戳转换为日期时间以便阅读
df["start_datetime"] = pd.to_datetime(df["start_time"], unit="s", errors="coerce")
df["end_datetime"] = pd.to_datetime(df["end_time"], unit="s", errors="coerce")
# 重新排列列以便更好地阅读
df = df[
[
"start_datetime",
"end_datetime",
"start_time",
"end_time",
"input_tokens",
"output_tokens",
"input_cached_tokens",
"input_audio_tokens",
"output_audio_tokens",
"num_model_requests",
"project_id",
"user_id",
"api_key_id",
"model",
"batch",
]
]
# 显示 DataFrame
df.head()
Data retrieved successfully!
start_datetime | end_datetime | start_time | end_time | input_tokens | output_tokens | input_cached_tokens | input_audio_tokens | output_audio_tokens | num_model_requests | project_id | user_id | api_key_id | model | batch | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2025-01-11 17:31:39 | 2025-01-12 | 1736616699 | 1736640000 | 6897 | 97 | 0 | 0 | 0 | 97 | proj_hNhhQzyYu7HxySZWs7cA3Ugu | None | None | gpt-4o-mini-2024-07-18 | None |
1 | 2025-01-11 17:31:39 | 2025-01-12 | 1736616699 | 1736640000 | 33984 | 206 | 0 | 0 | 0 | 95 | proj_hNhhQzyYu7HxySZWs7cA3Ugu | None | None | ft:gpt-4o-2024-08-06:distillation-test:wordle2... | None |
2 | 2025-01-11 17:31:39 | 2025-01-12 | 1736616699 | 1736640000 | 2846 | 8874 | 0 | 0 | 0 | 8 | proj_hNhhQzyYu7HxySZWs7cA3Ugu | None | None | o1-mini-2024-09-12 | None |
3 | 2025-01-11 17:31:39 | 2025-01-12 | 1736616699 | 1736640000 | 97474 | 579 | 0 | 0 | 0 | 270 | proj_hNhhQzyYu7HxySZWs7cA3Ugu | None | None | gpt-4o-2024-08-06 | None |
4 | 2025-01-12 00:00:00 | 2025-01-13 | 1736640000 | 1736726400 | 1989 | 28 | 0 | 0 | 0 | 28 | proj_hNhhQzyYu7HxySZWs7cA3Ugu | None | None | gpt-4o-mini-2024-07-18 | None |
将 API 响应解析为 DataFrame 并渲染堆叠条形图
现在我们将解析 JSON 数据,提取相关字段,并创建一个 pandas DataFrame 以便进行更轻松的操作和分析。
# 按模型和 project_id 对数据进行分组并汇总模型请求计数
grouped_by_model_project = (
df.groupby(["model", "project_id"])
.agg(
{
"num_model_requests": "sum",
}
)
.reset_index()
)
# 确定用于绘图和颜色映射的唯一模型和项目 ID
models = sorted(grouped_by_model_project["model"].unique())
project_ids = sorted(grouped_by_model_project["project_id"].unique())
distinct_colors = [
"#1f77b4",
"#ff7f0e",
"#2ca02c",
"#d62728",
"#9467bd",
"#8c564b",
"#e377c2",
"#7f7f7f",
"#bcbd22",
"#17becf",
]
project_color_mapping = {
pid: distinct_colors[i % len(distinct_colors)] for i, pid in enumerate(project_ids)
}
# 计算每个 project_id 的总请求数以用于图例
project_totals = (
grouped_by_model_project.groupby("project_id")["num_model_requests"]
.sum()
.sort_values(ascending=False) # 按总数降序排序
)
# 设置条形图位置
n_models = len(models)
bar_width = 0.6
x = np.arange(n_models)
plt.figure(figsize=(12, 6))
# 为每个模型绘制堆叠条形图
for model_idx, model in enumerate(models):
# 过滤当前模型的数据
model_data = grouped_by_model_project[grouped_by_model_project["model"] == model]
bottom = 0
# 在条形图中堆叠每个项目 ID 的片段
for _, row in model_data.iterrows():
color = project_color_mapping[row["project_id"]]
plt.bar(
x[model_idx],
row["num_model_requests"],
width=bar_width,
bottom=bottom,
color=color,
)
bottom += row["num_model_requests"]
# 标注和样式设置
plt.xlabel("Model")
plt.ylabel("Number of Model Requests")
plt.title("Total Model Requests by Model and Project ID Last 30 Days")
plt.xticks(x, models, rotation=45, ha="right")
# 创建排序后的图例,包含总数
handles = [
mpatches.Patch(color=project_color_mapping[pid], label=f"{pid} (Total: {total})")
for pid, total in project_totals.items()
]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
plt.show()
可视化示例:模型分布饼图
本节使用饼图可视化不同模型之间的 token 使用情况分布。
records = []
for bucket in all_group_data:
for result in bucket.get("results", []):
records.append(
{
"project_id": result.get("project_id", "N/A"),
"num_model_requests": result.get("num_model_requests", 0),
}
)
# 创建 DataFrame
df = pd.DataFrame(records)
# 按 project_id 对数据进行分组
grouped_by_project = (
df.groupby("project_id").agg({"num_model_requests": "sum"}).reset_index()
)
# 可视化饼图
if not grouped_by_project.empty:
# 过滤掉 num_model_requests == 0 的行
filtered_grouped_by_project = grouped_by_project[
grouped_by_project["num_model_requests"] > 0
]
# 计算过滤后的总模型请求数
total_requests = filtered_grouped_by_project["num_model_requests"].sum()
if total_requests > 0:
# 计算每个项目的总百分比
filtered_grouped_by_project["percentage"] = (
filtered_grouped_by_project["num_model_requests"] / total_requests
) * 100
# 分离“其他”项目(低于 5%)
other_projects = filtered_grouped_by_project[
filtered_grouped_by_project["percentage"] < 5
]
main_projects = filtered_grouped_by_project[
filtered_grouped_by_project["percentage"] >= 5
]
# 对“其他”项目进行汇总
if not other_projects.empty:
other_row = pd.DataFrame(
{
"project_id": ["Other"],
"num_model_requests": [other_projects["num_model_requests"].sum()],
"percentage": [other_projects["percentage"].sum()],
}
)
filtered_grouped_by_project = pd.concat(
[main_projects, other_row], ignore_index=True
)
# 按请求数排序以优化图例组织
filtered_grouped_by_project = filtered_grouped_by_project.sort_values(
by="num_model_requests", ascending=False
)
# 主要饼图,显示按 project_id 分布的模型请求
plt.figure(figsize=(10, 8))
plt.pie(
filtered_grouped_by_project["num_model_requests"],
labels=filtered_grouped_by_project["project_id"],
autopct=lambda p: f"{p:.1f}%\n({int(p * total_requests / 100):,})",
startangle=140,
textprops={"fontsize": 10},
)
plt.title("Distribution of Model Requests by Project ID", fontsize=14)
plt.axis("equal") # 相等长宽比确保饼图是圆形的。
plt.tight_layout()
plt.show()
# 如果存在“其他”项目,则生成第二个饼图进行细分
if not other_projects.empty:
other_total_requests = other_projects["num_model_requests"].sum()
plt.figure(figsize=(10, 8))
plt.pie(
other_projects["num_model_requests"],
labels=other_projects["project_id"],
autopct=lambda p: f"{p:.1f}%\n({int(p * other_total_requests / 100):,})",
startangle=140,
textprops={"fontsize": 10},
)
plt.title('Breakdown of "Other" Projects by Model Requests', fontsize=14)
plt.axis("equal") # 相等长宽比确保饼图是圆形的。
plt.tight_layout()
plt.show()
else:
print("Total model requests is zero. Pie chart will not be rendered.")
else:
print("No grouped data available for pie chart.")
Costs API 示例
在本节中,我们将使用 OpenAI Costs API 来检索和可视化成本数据。与 completions 数据类似,我们将:
- 调用 Costs API 获取汇总的成本数据。
- 将 JSON 响应解析为 pandas DataFrame。
- 使用条形图可视化按行项目分组的成本。
# 计算开始时间:从现在开始 n 天前
days_ago = 30
start_time = int(time.time()) - (days_ago * 24 * 60 * 60)
# 定义 Costs API 端点
costs_url = "https://api.openai.com/v1/organization/costs"
costs_params = {
"start_time": start_time, # 必需:开始时间(Unix 秒)
"bucket_width": "1d", # 可选:目前仅支持 '1d'
"limit": 30, # 可选:要返回的桶数
}
# 初始化一个空列表以存储所有数据
all_costs_data = get_data(costs_url, costs_params)
Data retrieved successfully!
print(json.dumps(all_costs_data, indent=2))
[
{
"object": "bucket",
"start_time": 1736553600,
"end_time": 1736640000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.13080438340307526,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1736640000,
"end_time": 1736726400,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.12270423340307525,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1736726400,
"end_time": 1736812800,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 9.888144383403077,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1736812800,
"end_time": 1736899200,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.3507639334030752,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1736899200,
"end_time": 1736985600,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.2977481185324674,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1736985600,
"end_time": 1737072000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00925485477848094,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737072000,
"end_time": 1737158400,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 8.889884136532304,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737158400,
"end_time": 1737244800,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 21.167310118127915,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737244800,
"end_time": 1737331200,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.04955636812791847,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737331200,
"end_time": 1737417600,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.0003226181279184669,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737417600,
"end_time": 1737504000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.6320363681279185,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737504000,
"end_time": 1737590400,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 52.41558761812793,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737590400,
"end_time": 1737676800,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 104.88761235323427,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737676800,
"end_time": 1737763200,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.3376030385950106,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737763200,
"end_time": 1737849600,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.062551042553524,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737849600,
"end_time": 1737936000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00032195744715549047,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1737936000,
"end_time": 1738022400,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.0003084210662774742,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738022400,
"end_time": 1738108800,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00032195744715549047,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738108800,
"end_time": 1738195200,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.5142559074471554,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738195200,
"end_time": 1738281600,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.21870350744715547,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738281600,
"end_time": 1738368000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 1.4528752074471551,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738368000,
"end_time": 1738454400,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00042714787262957543,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738454400,
"end_time": 1738540800,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00032195744715549047,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738540800,
"end_time": 1738627200,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.0031147346857709622,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738627200,
"end_time": 1738713600,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 68.30023964957941,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738713600,
"end_time": 1738800000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 14.858330207447157,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738800000,
"end_time": 1738886400,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.3137180574471555,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738886400,
"end_time": 1738972800,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.02677460744715549,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1738972800,
"end_time": 1739059200,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.007399792553524012,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1739059200,
"end_time": 1739145600,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00032195744715549047,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
},
{
"object": "bucket",
"start_time": 1739145600,
"end_time": 1739232000,
"results": [
{
"object": "organization.costs.result",
"amount": {
"value": 0.00012073404268330895,
"currency": "usd"
},
"line_item": null,
"project_id": null,
"organization_id": "org-GLHrIv00VVN9dEQC2b4wsBkf"
}
]
}
]
解析 Costs API 响应并创建 DataFrame
我们现在将解析来自 Costs API 的 JSON 数据,提取相关字段,并创建一个 pandas DataFrame 以便进一步分析。
# 初始化一个列表来保存解析后的成本记录
cost_records = []
# 从 all_costs_data 中提取分桶成本数据
for bucket in all_costs_data:
start_time = bucket.get("start_time")
end_time = bucket.get("end_time")
for result in bucket.get("results", []):
cost_records.append(
{
"start_time": start_time,
"end_time": end_time,
"amount_value": result.get("amount", {}).get("value", 0),
"currency": result.get("amount", {}).get("currency", "usd"),
"line_item": result.get("line_item"),
"project_id": result.get("project_id"),
}
)
# 从成本记录创建 DataFrame
cost_df = pd.DataFrame(cost_records)
# 将 Unix 时间戳转换为日期时间以便阅读
cost_df["start_datetime"] = pd.to_datetime(cost_df["start_time"], unit="s")
cost_df["end_datetime"] = pd.to_datetime(cost_df["end_time"], unit="s")
# 显示 DataFrame 的前几行
cost_df.head()
start_time | end_time | amount_value | currency | line_item | project_id | start_datetime | end_datetime | |
---|---|---|---|---|---|---|---|---|
0 | 1736553600 | 1736640000 | 0.130804 | usd | None | None | 2025-01-11 | 2025-01-12 |
1 | 1736640000 | 1736726400 | 0.122704 | usd | None | None | 2025-01-12 | 2025-01-13 |
2 | 1736726400 | 1736812800 | 9.888144 | usd | None | None | 2025-01-13 | 2025-01-14 |
3 | 1736812800 | 1736899200 | 0.350764 | usd | None | None | 2025-01-14 | 2025-01-15 |
4 | 1736899200 | 1736985600 | 0.297748 | usd | None | None | 2025-01-15 | 2025-01-16 |
可视化每日总成本
我们将创建一个条形图来可视化按天汇总的总成本。这有助于从高层视角了解组织支出情况。
if not cost_df.empty:
# 确保对 'start_datetime' 列进行了日期时间转换
if (
"start_datetime" not in cost_df.columns
or not pd.api.types.is_datetime64_any_dtype(cost_df["start_datetime"])
):
cost_df["start_datetime"] = pd.to_datetime(
cost_df["start_time"], unit="s", errors="coerce"
)
# 创建一个新列,仅包含 'start_datetime' 的日期部分
cost_df["date"] = cost_df["start_datetime"].dt.date
# 按日期对金额求和
cost_per_day = cost_df.groupby("date")["amount_value"].sum().reset_index()
# 绘制数据
plt.figure(figsize=(12, 6))
plt.bar(
cost_per_day["date"],
cost_per_day["amount_value"],
width=0.6,
color="skyblue",
alpha=0.8,
)
plt.xlabel("Date")
plt.ylabel("Total Cost (USD)")
plt.title("Total Cost per Day (Last 30 Days)")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()
else:
print("No cost data available to plot.")
按行项目可视化成本
我们将创建一个条形图来可视化按行项目汇总的总成本。这有助于识别哪些类别(例如,模型或其他服务)对支出贡献最大。
days_ago = 30
start_time = int(time.time()) - (days_ago * 24 * 60 * 60)
costs_params = {
"start_time": start_time, # 必需:开始时间(Unix 秒)
"bucket_width": "1d", # 可选:目前仅支持 '1d'
"limit": 30, # 可选:要返回的桶数
"group_by": ["line_item"],
}
line_item_cost_data = get_data(costs_url, costs_params)
# 初始化一个列表来保存解析后的成本记录
cost_records = []
# 从 all_costs_data 中提取分桶成本数据
for bucket in line_item_cost_data:
start_time = bucket.get("start_time")
end_time = bucket.get("end_time")
for result in bucket.get("results", []):
cost_records.append(
{
"start_time": start_time,
"end_time": end_time,
"amount_value": result.get("amount", {}).get("value", 0),
"currency": result.get("amount", {}).get("currency", "usd"),
"line_item": result.get("line_item"),
"project_id": result.get("project_id"),
}
)
# 创建一个 DataFrame 从成本记录
cost_df = pd.DataFrame(cost_records)
# 将 Unix 时间戳转换为日期时间以便阅读
cost_df["start_datetime"] = pd.to_datetime(cost_df["start_time"], unit="s")
cost_df["end_datetime"] = pd.to_datetime(cost_df["end_time"], unit="s")
# 显示 DataFrame 的前几行
cost_df.head()
Data retrieved successfully!
start_time | end_time | amount_value | currency | line_item | project_id | start_datetime | end_datetime | |
---|---|---|---|---|---|---|---|---|
0 | 1736553600 | 1736640000 | 0.127440 | usd | ft-gpt-4o-2024-08-06, input | proj_hNhhQzyYu7HxySZWs7cA3Ugu | 2025-01-11 | 2025-01-12 |
1 | 1736553600 | 1736640000 | 0.003090 | usd | ft-gpt-4o-2024-08-06, output | proj_hNhhQzyYu7HxySZWs7cA3Ugu | 2025-01-11 | 2025-01-12 |
2 | 1736553600 | 1736640000 | 0.000271 | usd | assistants api | file search | proj_L67gOme4S2nBA8aQieEOwLy7 | 2025-01-11 | 2025-01-12 |
3 | 1736553600 | 1736640000 | 0.000003 | usd | assistants api | file search | proj_VV4ZAjd6ALfFd9uh0vY8joR1 | 2025-01-11 | 2025-01-12 |
4 | 1736640000 | 1736726400 | 0.028607 | usd | evals | gpt-4o-mini-2024-07-18, input | proj_L67gOme4S2nBA8aQieEOwLy7 | 2025-01-12 | 2025-01-13 |
if not cost_df.empty:
# 确保对 'start_datetime' 列进行了日期时间转换
if "start_datetime" not in cost_df.columns or not pd.api.types.is_datetime64_any_dtype(cost_df["start_datetime"]):
cost_df["start_datetime"] = pd.to_datetime(cost_df["start_time"], unit="s", errors="coerce")
# 创建一个新列,仅包含 'start_datetime' 的日期部分
cost_df["date"] = cost_df["start_datetime"].dt.date
# 按日期和行项目对金额求和
cost_per_day = cost_df.groupby(["date", "line_item"])["amount_value"].sum().reset_index()
# 枢轴化 DataFrame,使每个日期都有一个包含行项目堆叠的条形图
cost_pivot = cost_per_day.pivot(index="date", columns="line_item", values="amount_value").fillna(0)
cost_pivot = cost_pivot.sort_index()
# 绘制堆叠条形图,每个分组日期一个条形图
plt.figure(figsize=(12, 6))
ax = cost_pivot.plot(kind="bar", stacked=True, ax=plt.gca(), width=0.8)
plt.xlabel("Date")
plt.ylabel("Total Cost (USD)")
plt.title("Total Cost by Line Item")
plt.xticks(rotation=45, ha="right")
# 更新图例,使其不覆盖图形,将其放置在图形外部
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.)
plt.tight_layout()
plt.show()
else:
print("No cost data available to plot.")
/var/folders/r_/g8r2dz8s2qd104th5p5yxljr0000gp/T/ipykernel_49468/2813361465.py:25: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all Axes decorations.
plt.tight_layout()
其他可视化(可选)
您可以扩展此 notebook,为 Completions 和 Costs API 添加更多可视化。例如:
Completions API:
- 按用户、项目或模型分组,查看哪些消耗的 token 最多。
- 为每日或每小时 token 使用情况的时间序列分析创建折线图。
- 使用饼图可视化跨模型、用户或项目的 token 分布。
- 尝试不同的
group_by
参数(例如["model", "user_id"]
)以获得更深入的见解。
Costs API:
- 按项目或行项目分组以识别支出模式。
- 创建折线图或条形图以可视化每日成本趋势。
- 使用饼图显示成本在项目、服务或行项目之间的分布情况。
- 尝试各种
group_by
选项(例如["project_id"]
、["line_item"]
)以进行精细分析。
使用 pandas
和 matplotlib
(或 Plotly/Bokeh 等库)尝试不同的参数和可视化技术以获得更深入的见解,并考虑将这些可视化集成到交互式仪表板中以进行实时监控。
与第三方仪表板平台集成
要将 OpenAI 使用和成本数据引入 Tableau、Power BI 或自定义平台(例如 Plotly Dash、Bokeh)等外部仪表板工具,请遵循以下步骤:
-
数据收集与准备: - 使用 Python 脚本定期从 Completions 和 Costs API 获取数据。 - 使用 pandas 处理和聚合数据,然后将其存储在数据库、数据仓库中,或导出为 CSV/JSON 文件。
-
连接到仪表板: - BI 工具(Tableau、Power BI):
- 直接连接到准备好的数据源(SQL 数据库、CSV 文件或 Web API)。
- 使用内置连接器安排数据刷新,确保仪表板始终显示最新信息。
- 自定义仪表板(Plotly Dash、Bokeh):
- 将 API 调用和数据处理嵌入到仪表板代码中。
- 构建交互式可视化组件,这些组件在获取新数据时自动更新。
-
实时和自动化更新: - 使用 cron 作业、任务计划程序或工作流工具(例如 Apache Airflow)安排脚本定期刷新数据。 - 实现 Webhook 或流式 API(如果可用)以实现近乎实时的数据更新。
通过将 API 数据集成到第三方平台,您可以创建交互式、实时的仪表板,将 OpenAI 指标与其他业务数据相结合,从而提供全面的见解和自动化监控。