【星海随笔】Python-JSON数据的处理-EW帮帮网

JSON 是一种轻量级的数据交换格式，主要用于在客户端和服务器之间传输数据。

JSON 在 python 里是一个标准库
https://www.jyshare.com/compile/9/

import json
data = {'name': 'Alice', 'age': 30, 'city': 'New York'}
json_string = json.dumps(data)
print(json_string)

json_data = '{"name": "Bob", "age": 25, "city": "Los Angeles"}'
python_obj = json.loads(json_data)
print(python_obj)

{“name”: “Alice”, “age”: 30, “city”: “New York”} #Json格式
{‘name’: ‘Bob’, ‘age’: 25, ‘city’: ‘Los Angeles’} #Python对象

#如果使用str修改双引号为单引号，则并不能转换为Dict模式

json.dumps(obj): 将 Python 对象转换为 JSON 格式的字符串。
json.loads(s): 将 JSON 格式的字符串转换为 Python 对象。
json.dump(obj, fp): 将 Python 对象写入文件（以 JSON 格式）。
json.load(fp): 从文件中读取 JSON 格式的数据并转换为 Python 对象。

注：Json 是轻量级的无法直接序列化 datetime 格式，需要使用其他方式进行对 datetime 的格式进行自定义处理。例如：修改 Super 继承 DateTimeEncoder 或者 datetime 转化为 str 。
方法1

import json
from datetime import datetime
 
class DateTimeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()  # 或者使用其他格式化方法
        return super().default(obj)
 
data = {
    'name': 'Alice',
    'timestamp': datetime.now()
}
 
json_str = json.dumps(data, cls=DateTimeEncoder)
print(json_str)

方法2

from datetime import datetime, timezone
# 创建一个包含时区信息的 datetime 对象
dt_with_tz = datetime(2023, 10, 5, 14, 48, 0, tzinfo=timezone.utc)
 
# 使用 isoformat() 方法格式化为字符串
dt_str_with_tz = dt_with_tz.isoformat()
print(dt_str_with_tz)  # 输出: '2023-10-05T14:48:00+00:00'
 
# 创建一个不包含时区信息的 datetime 对象
dt_without_tz = datetime(2023, 10, 5, 14, 48, 0)
 
# 使用 isoformat() 方法格式化为字符串
dt_str_without_tz = dt_without_tz.isoformat()
print(dt_str_without_tz)  # 输出："  "

Supports six types of data structures

字符串（string）
数字（number）
对象（object，即键值对）
数组（array）
布尔值（true/false）
空值（null）

check source code

import inspect
import json
 
# 查看 json 模块的源码文件路径（仅适用于纯 Python 模块）
print(json.__file__)
 
# 查看 json.dumps 函数的源码
try:
    print(inspect.getsource(json.dumps))
except TypeError:
    print("该函数可能是用 C 实现的，无法直接查看源码。")

Source Code

/usr/local/python-3.8.1/lib/python3.8/json/__init__.py
def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
        allow_nan=True, cls=None, indent=None, separators=None,
        default=None, sort_keys=False, **kw):
    """Serialize ``obj`` to a JSON formatted ``str``.

    If ``skipkeys`` is true then ``dict`` keys that are not basic types
    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
    instead of raising a ``TypeError``.

    If ``ensure_ascii`` is false, then the return value can contain non-ASCII
    characters if they appear in strings contained in ``obj``. Otherwise, all
    such characters are escaped in JSON strings.

    If ``check_circular`` is false, then the circular reference check
    for container types will be skipped and a circular reference will
    result in an ``OverflowError`` (or worse).

    If ``allow_nan`` is false, then it will be a ``ValueError`` to
    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    strict compliance of the JSON specification, instead of using the
    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).

    If ``indent`` is a non-negative integer, then JSON array elements and
    object members will be pretty-printed with that indent level. An indent
    level of 0 will only insert newlines. ``None`` is the most compact
    representation.

    If specified, ``separators`` should be an ``(item_separator, key_separator)``
    tuple.  The default is ``(', ', ': ')`` if *indent* is ``None`` and
    ``(',', ': ')`` otherwise.  To get the most compact JSON representation,
    you should specify ``(',', ':')`` to eliminate whitespace.

    ``default(obj)`` is a function that should return a serializable version
    of obj or raise TypeError. The default simply raises TypeError.

    If *sort_keys* is true (default: ``False``), then the output of
    dictionaries will be sorted by key.

    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    ``.default()`` method to serialize additional types), specify it with
    the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.

    """
    # cached encoder
    if (not skipkeys and ensure_ascii and
        check_circular and allow_nan and
        cls is None and indent is None and separators is None and
        default is None and not sort_keys and not kw):
        return _default_encoder.encode(obj)
    if cls is None:
        cls = JSONEncoder
    return cls(
        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
        separators=separators, default=default, sort_keys=sort_keys,
        **kw).encode(obj)

skipkeys

Default is False

true时遇到非法key值进行跳过

import json
 
# 定义一个包含非基本类型键的字典
data = {
    "name": "Alice",
    42: "The answer",
    3.14: "Pi",
    True: "Boolean True",
    None: "Null value",
    ("tuple", "key"): "This will be skipped"  # 元组作为键，是非基本类型
}
 
# 使用 json.dumps() 序列化字典，skipkeys=False（默认）
try:
    json_str_default = json.dumps(data)
except TypeError as e:
    print("Error with skipkeys=False:", e)
 
# 使用 json.dumps() 序列化字典，skipkeys=True
json_str_skipkeys = json.dumps(data, skipkeys=True)
 
# 输出结果
print("\nJSON string with skipkeys=True:")
print(json_str_skipkeys)

data = {
    "name": "Alice",
    42: "The answer",
    3.14: "Pi",
    True: "Boolean True",
    None: "Null value"
}
json_str_default = json.dumps(data)
print(json_str_skipkeys)

result

Error with skipkeys=False: keys must be str, int, float, bool or None, not tuple
JSON string with skipkeys=True:
{"name": "Alice", "42": "The answer", "3.14": "Pi", "true": "Boolean True", "null": "Null value"}
{"name": "Alice", "42": "The answer", "3.14": "Pi", "true": "Boolean True", "null": "Null value"}

ensure_ascii

Default is True

import json
 
# 定义一个包含非ASCII字符（中文）的Python字典
data = {"code": 1001, "name": "电波发", "description": "这是一个测试"}
 
# 1. 当 ensure_ascii=False 时
json_str_no_escape = json.dumps(data, ensure_ascii=False, indent=4)
print("ensure_ascii=False 的输出:")
print(json_str_no_escape)
 
# 2. 当 ensure_ascii=True 时（默认值）
json_str_escape = json.dumps(data, ensure_ascii=True, indent=4)
print("\nensure_ascii=True 的输出:")
print(json_str_escape)

result

{
    "code": 1001,
    "name": "电波发",
    "description": "这是一个测试"
}

{
    "code": 1001,
    "name": "\u7535\u6ce2\u53d1",
    "description": "\u8fd9\u662f\u4e00\u4e2a\u6d4b\u8bd5"
}

check_circular

默认行为（check_circular=True）：

import json
 
# 创建一个包含循环引用的字典
data = {}
data['self'] = data  # 创建循环引用
 
# 尝试序列化字典，check_circular=True（默认）
try:
    json_str_default = json.dumps(data, check_circular=True)  # 默认行为，实际上可以省略 check_circular 参数
except ValueError as e:
    print("Error with check_circular=True:", e)
 
# 尝试序列化字典，check_circular=False
try:
    json_str_skip_check = json.dumps(data, check_circular=False)
except (OverflowError, RecursionError) as e:  # 可能抛出 OverflowError 或 RecursionError
    print("Error with check_circular=False:", e)

result

Error with check_circular=True: Out of memory during dict size allocation
# （实际运行时更可能表现为：循环引用检测到，抛出 ValueError 类似提示逻辑错误的异常，
# 示例中直接写为内存分配错误仅为示意，核心是阻止序列化继续）
# 简化理解输出描述为：
Error with check_circular=True: Circular reference detected
 
Error with check_circular=False: maximum recursion depth exceeded while calling a Python object
# （或 OverflowError，具体取决于 Python 解释器和环境，
# 核心是未检测循环引用导致无限递归）

allow_nan

Default is True

float(‘nan’) 表示 NaN（Not a Number）。
float(‘inf’) 表示正无穷大。
float(‘-inf’) 表示负无穷大。
allow_nan=False 时，json.dumps 会检查数据中的所有浮点数。如果发现 NaN、Infinity 或 -Infinity，会引发 ValueError，因为这些值不符合 JSON 规范。

参数控制是否允许序列化非有限浮点数
当 allow_nan=False 时，如果尝试序列化这些值，会引发 ValueError，以严格遵守 JSON 规范（JSON 规范不支持这些特殊值）。

import json
 
data = {
    "finite_number": 1.23,
    "nan_value": float('nan'),
    "positive_infinity": float('inf'),
    "negative_infinity": float('-inf')
}
 
try:
    # 尝试序列化数据，allow_nan=False（默认值）
    json_string = json.dumps(data, allow_nan=False)
except ValueError as e:
    print(f"ValueError: {e}")

ValueError: Out of range float values are not JSON compliant

当 allow_nan=True 时，json.dumps 会将这些特殊值序列化为它们的 JavaScript 等价形式：

NaN → “NaN”
Infinity → “Infinity”
-Infinity → “-Infinity”

import json
 
data = {
    "finite_number": 1.23,
    "nan_value": float('nan'),
    "positive_infinity": float('inf'),
    "negative_infinity": float('-inf')
}
 
# 允许序列化非有限浮点数
json_string = json.dumps(data, allow_nan=True)
print(json_string)

{“finite_number”: 1.23, “nan_value”: “NaN”, “positive_infinity”: “Infinity”, “negative_infinity”: “-Infinity”}

indent

选择缩进长度，默认为None，既不含缩进

Default is None

import json
 
data = {
    "name": "Alice",
    "age": 30,
    "city": "New York",
    "skills": ["Python", "Machine Learning", "Data Analysis"]
}
 
compact_json = json.dumps(data)
print("Compact JSON:")
print(compact_json)

{"name": "Alice", "age": 30, "city": "New York", "skills": ["Python", "Machine Learning", "Data Analysis"]}

JSON with indent=0，则只增加换行符

{
"name": "Alice",
"age": 30,
"city": "New York",
"skills": ["Python", "Machine Learning", "Data Analysis"]
}

indent=2

{
  "name": "Alice",
  "age": 30,
  "city": "New York",
  "skills": [
    "Python",
    "Machine Learning",
    "Data Analysis"
  ]
}

cls

Default is None
json.JSONEncoder
cls 参数允许用户指定一个自定义的 JSON 编码器类，而不是使用默认的 json.JSONEncoder。

复写JSONEncoder方式

import json
 
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
 
class PersonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Person):
            return {'name': obj.name, 'age': obj.age}
        # 让基类的 default 方法处理其他类型
        return super().default(obj)
 
# 创建一个 Person 对象
person = Person("Alice", 30)
 
# 使用自定义的编码器序列化 Person 对象
json_str = json.dumps(person, cls=PersonEncoder)
print(json_str)  # 输出: {"name": "Alice", "age": 30}

separators

default is (', ', ': ')
更换分隔符，默认使用逗号(,) 和等于号(=)
一般与 ident 配合使用

import json

data = {
    "name":"Alice",
    "age":30,
    "city":"New York",
    "skills":["Python", "Machine Learning", "Data Analysis"]
}

# 1. 使用默认的 separators（indent=None）
#default_json = json.dumps(data)
#print("Default JSON (indent=None):")
#print(default_json)
# 输出: {"name": "Alice", "age": 30, "city": "New York", "skills": ["Python", "Machine Learning", "Data Analysis"]}
#compact_json = json.dumps(data, indent=2,separators=(',','-'))
compact_json = json.dumps(data, indent=2,separators=('.','-'))
print(compact_json)

result

{
  "name"-"Alice".
  "age"-30.
  "city"-"New York".
  "skills"-[
    "Python".
    "Machine Learning".
    "Data Analysis"
  ]
}

sort_keys

If *sort_keys* is true (default: ``False``), then the output of
dictionaries will be sorted by key.

default

类似与 cls 可以定义序列化，进行扩展
区别于 default 与函数绑定，cls 与类绑定

import json
 
# 定义一个自定义对象
class MyClass:
    def __init__(self, value):
        self.value = value
 
# 默认的 default 函数会抛出 TypeError
obj = MyClass(42)
try:
    json.dumps(obj)  # 尝试序列化自定义对象
except TypeError as e:
    print(f"默认行为: {e}")
 
# 自定义 default 函数
def custom_default(obj):
    if isinstance(obj, MyClass):
        return {"type": "MyClass", "value": obj.value}
    raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
 
# 使用自定义的 default 函数
try:
    serialized = json.dumps(obj, default=custom_default)
    print(f"自定义序列化结果: {serialized}")
except TypeError as e:
    print(f"自定义行为失败: {e}")

【星海随笔】Python-JSON数据的处理

Source Code

skipkeys

Default is False

result

ensure_ascii

Default is True

result

check_circular

result

allow_nan

Default is True

indent

Default is None

cls

separators

result

sort_keys

default

网站公告

今日签到

热门文章

最新发布