Skip to content

Update sources.py to reflect accurate datetime settings #31

@VKeff

Description

@VKeff

Example of error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
[<ipython-input-2-5d8bc327a36c>](https://localhost:8080/#) in <cell line: 0>()
----> 1 bdf = BolideDataFrame(source='glm')

11 frames
[/usr/local/lib/python3.11/dist-packages/bolides/bdf.py](https://localhost:8080/#) in __init__(self, *args, **kwargs)
     99         if source in ['website', 'glm']:
    100             source = 'glm'
--> 101             init_gdf = glm_website()
    102 
    103         elif source == 'usg':

[/usr/local/lib/python3.11/dist-packages/bolides/sources.py](https://localhost:8080/#) in glm_website()
     18     # create DataFrame using JSON data
     19     df = pd.DataFrame(json['data'])
---> 20     df["datetime"] = df["datetime"].astype("datetime64")
     21 
     22     # add bolide energy data

[/usr/local/lib/python3.11/dist-packages/pandas/core/generic.py](https://localhost:8080/#) in astype(self, dtype, copy, errors)
   6641         else:
   6642             # else, only a single dtype is given
-> 6643             new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6644             res = self._constructor_from_mgr(new_data, axes=new_data.axes)
   6645             return res.__finalize__(self, method="astype")

[/usr/local/lib/python3.11/dist-packages/pandas/core/internals/managers.py](https://localhost:8080/#) in astype(self, dtype, copy, errors)
    428             copy = False
    429 
--> 430         return self.apply(
    431             "astype",
    432             dtype=dtype,

[/usr/local/lib/python3.11/dist-packages/pandas/core/internals/managers.py](https://localhost:8080/#) in apply(self, f, align_keys, **kwargs)
    361                 applied = b.apply(f, **kwargs)
    362             else:
--> 363                 applied = getattr(b, f)(**kwargs)
    364             result_blocks = extend_blocks(applied, result_blocks)
    365 

[/usr/local/lib/python3.11/dist-packages/pandas/core/internals/blocks.py](https://localhost:8080/#) in astype(self, dtype, copy, errors, using_cow, squeeze)
    756             values = values[0, :]  # type: ignore[call-overload]
    757 
--> 758         new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    759 
    760         new_values = maybe_coerce_values(new_values)

[/usr/local/lib/python3.11/dist-packages/pandas/core/dtypes/astype.py](https://localhost:8080/#) in astype_array_safe(values, dtype, copy, errors)
    235 
    236     try:
--> 237         new_values = astype_array(values, dtype, copy=copy)
    238     except (ValueError, TypeError):
    239         # e.g. _astype_nansafe can fail on object-dtype of strings

[/usr/local/lib/python3.11/dist-packages/pandas/core/dtypes/astype.py](https://localhost:8080/#) in astype_array(values, dtype, copy)
    180 
    181     else:
--> 182         values = _astype_nansafe(values, dtype, copy=copy)
    183 
    184     # in pandas we don't store numpy str dtypes, so convert to object

[/usr/local/lib/python3.11/dist-packages/pandas/core/dtypes/astype.py](https://localhost:8080/#) in _astype_nansafe(arr, dtype, copy, skipna)
    108             from pandas.core.arrays import DatetimeArray
    109 
--> 110             dta = DatetimeArray._from_sequence(arr, dtype=dtype)
    111             return dta._ndarray
    112 

[/usr/local/lib/python3.11/dist-packages/pandas/core/arrays/datetimes.py](https://localhost:8080/#) in _from_sequence(cls, scalars, dtype, copy)
    325     @classmethod
    326     def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
--> 327         return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
    328 
    329     @classmethod

[/usr/local/lib/python3.11/dist-packages/pandas/core/arrays/datetimes.py](https://localhost:8080/#) in _from_sequence_not_strict(cls, data, dtype, copy, tz, freq, dayfirst, yearfirst, ambiguous)
    352             tz = timezones.maybe_get_tz(tz)
    353 
--> 354         dtype = _validate_dt64_dtype(dtype)
    355         # if dtype has an embedded tz, capture it
    356         tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

[/usr/local/lib/python3.11/dist-packages/pandas/core/arrays/datetimes.py](https://localhost:8080/#) in _validate_dt64_dtype(dtype)
   2542                 "Please pass in 'datetime64[ns]' instead."
   2543             )
-> 2544             raise ValueError(msg)
   2545 
   2546         if (

ValueError: Passing in 'datetime64' dtype with no precision is not allowed. Please pass in 'datetime64[ns]' instead.

Proposed solution:

Error: df["datetime"] = df["datetime"].astype("datetime64[ns]")
Solution: df["datetime"] = pd.to_datetime(df["datetime"], utc=True, unit='ns').dt.tz_localize(None)

Solution passes in nanoseconds as a specified pandas unit (datetime64[ns]) and sets time as Coordinated Universal Time (UTC) before having timezone information removed by (.dt.tz_localize(None))
NOTE: This isn't a perfect fix and may contribute to timezone-specific errors. Great for timezone-naive data collection, otherwise more detailed investigation of timezone-specific datetimes would be recommended for future use.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions