Repository: datamade/census
Branch: master
Commit: c64202b56490
Files: 14
Total size: 46.0 KB

Directory structure:
gitextract_87spxlep/

├── .github/
│   └── workflows/
│       └── python-package.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── README.rst
├── census/
│   ├── __init__.py
│   ├── core.py
│   └── tests/
│       ├── __init__.py
│       └── test_census.py
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── setup.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/python-package.yml
================================================
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python package

on:
  push:
    branches: [ master ]
    tags:
      - v*
  pull_request:
    branches: [ master ]

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

    steps:
    - uses: actions/checkout@v3
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        python -m pip install flake8 pytest
    - name: Install package
      run: pip install .
    - name: Lint with flake8
      run: |
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
    - name: Test with pytest
      run: |
        pytest

  publish:
    needs: test
    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
    runs-on: ubuntu-latest
    
    steps:
    - uses: actions/checkout@v3
    - name: Set up Python 3.12
      uses: actions/setup-python@v4
      with:
        python-version: 3.12
    - name: Build and publish
      env:
        TWINE_USERNAME: __token__
        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
      run: |
        pip install setuptools twine wheel
        pip wheel -w dist --no-deps .
        python setup.py sdist
        twine upload dist/*


================================================
FILE: .gitignore
================================================
*.pyc
.DS_Store
.env
.cache/
.tox/
build/


================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Carbaugh"
  given-names: "Jeremy"
- family-names: "Gregg"
  given-names: "Forest"
- name: "Contributors"
title: "census"
version: 0.8.26
date-released: 2026-02-07
url: "https://github.com/datamade/census"


================================================
FILE: LICENSE
================================================
Copyright (c) 2012, Sunlight Labs; 2017, DataMade

All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
    * Neither the name of Sunlight Labs or DataMade nor the names of its contributors may be
      used to endorse or promote products derived from this software without
      specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: MANIFEST.in
================================================
include LICENSE *.rst *.py

================================================
FILE: README.rst
================================================
======
census
======
.. image:: https://github.com/datamade/census/workflows/Python%20package/badge.svg
 

A simple wrapper for the United States Census Bureau's API.

Provides access to ACS and SF1 data sets.

Install
=======

::

    pip install census

You may also want to install a complementary library, `us <https://pypi.python.org/pypi/us>`_, which help you figure out the
`FIPS <https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code>`_ codes for many geographies. We use it in the examples below.

::

   pip install us

Usage
=====

First, get yourself a `Census API key <https://api.census.gov/data/key_signup.html>`_.

::

    from census import Census
    from us import states

    c = Census("MY_API_KEY")
    c.acs5.get(('NAME', 'B25034_010E'),
              {'for': 'state:{}'.format(states.MD.fips)})

The call above will return the name of the geographic area and the number of
homes that were built before 1939 for the state of Maryland. Helper methods have
been created to simplify common geometry calls::

    c.acs5.state(('NAME', 'B25034_010E'), states.MD.fips)

Full details on geometries and the states module can be found below.

The get method is the core data access method on both the ACS and SF1 data sets.
The first parameter is either a single string column or a tuple of columns. The
second parameter is a geoemtry dict with a `for` key and on option `in` key. The
`for` argument accepts a `"*"` wildcard character or `Census.ALL`. The wildcard
is not valid for the `in` parameter.

By default, the year for a dataset is the most recent year available. To access earlier data,
pass a year parameter to the API call::

    c.acs5.state(('NAME', 'B25034_010E'), states.MD.fips, year=2010)

The default year may also be set client-wide::

    c = Census("MY_API_KEY", year=2010)


Detailed information about the API can be found at the `Census Data API User Guide <https://www.census.gov/data/developers/guidance/api-user-guide.html>`_.

Datasets
========

For each dataset, the first year listed is the default.

* acs5: `ACS 5 Year Estimates <https://www.census.gov/data/developers/data-sets/acs-5year.html>`_ (2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009)
* acs3: `ACS 3 Year Estimates <https://www.census.gov/data/developers/data-sets/acs-3year.html>`_ (2013, 2012)
* acs1: `ACS 1 Year Estimates <https://www.census.gov/data/developers/data-sets/acs-1year.html>`_ (2024, 2023, 2022, 2021, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011)
* acs5dp: `ACS 5 Year Estimates, Data Profiles  <https://www.census.gov/data/developers/data-sets/acs-5year.html>`_ (2023, 2022, 2021, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009)
* acs3dp: `ACS 3 Year Estimates, Data Profiles <https://www.census.gov/data/developers/data-sets/acs-3year.html>`_ (2013, 2012)
* acs1dp: `ACS 1 Year Estimates, Data Profiles <https://www.census.gov/data/developers/data-sets/acs-1year.html>`_ (2024, 2023, 2022, 2021, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011)
* acs5st: `ACS 5 Year Estimates, Subject Tables <https://www.census.gov/data/developers/data-sets/acs-5year.html>`_ (2024, 2023, 2022, 2021, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009)
* sf1: `Census Summary File 1 <https://www.census.gov/data/datasets/2010/dec/summary-file-1.html>`_ (2010)
* pl: `Redistricting Data Summary File <https://www.census.gov/programs-surveys/decennial-census/about/rdo/summary-files.2020.html>`_ (2020, 2010, 2000) 


Geographies
===========

The API supports a wide range of geographic regions. The specification of these
can be quite complicated so a number of convenience methods are provided. Refer to the `Census API documentation <https://www.census.gov/data/developers/guidance/api-user-guide.html>`_
for more geographies beyond the convenience methods.

*Not all geographies are supported in all years. Calling a convenience method
with a year that is not supported will raise census.UnsupportedYearException.*

`Geographic relationship files <https://www.census.gov/geo/maps-data/data/relationship.html>`_ are provided on the Census developer site as a tool to help users compare the geographies from the 1990, 2000 and 2010 Censuses. From these files, data users may determine how geographies from one Census relate to those from the prior Census.

ACS5 Geographies
----------------

* state(fields, state_fips)
* state_county(fields, state_fips, county_fips)
* state_county_blockgroup(fields, state_fips, county_fips, blockgroup)
* state_county_subdivision(fields, state_fips, county_fips, subdiv_fips)
* state_county_tract(fields, state_fips, county_fips, tract)
* state_place(fields, state_fips, place)
* state_congressional_district(fields, state_fips, congressional_district)
* state_legislative_district_upper(fields, state_fips, legislative_district)
* state_legislative_district_lower(fields, state_fips, legislative_district)
* us(fields)
* state_zipcode(fields, state_fips, zip5)

ACS1 Geographies
----------------

* state(fields, state_fips)
* state_congressional_district(fields, state_fips, district)
* us(fields)

SF1 Geographies
---------------

* state(fields, state_fips)
* state_county(fields, state_fips, county_fips)
* state_county_subdivision(fields, state_fips, county_fips, subdiv_fips)
* state_county_tract(fields, state_fips, county_fips, tract)
* state_place(fields, state_fips, place)
* state_congressional_district(fields, state_fips, district)
* state_msa(fields, state_fips, msa)
* state_csa(fields, state_fips, csa)
* state_district_place(fields, state_fips, district, place)
* state_zipcode(fields, state_fips, zip5)

PL Geographies
--------------

* state(fields, state_fips)
* state_county(fields, state_fips, county_fips)
* state_county_subdivision(fields, state_fips, county_fips, subdiv_fips)
* state_county_tract(fields, state_fips, county_fips, tract)
* state_county_blockgroup(fields, state_fips, county_fips, blockgroup)
* state_place(fields, state_fips, place)
* state_congressional_district(fields, state_fips, district)
* state_legislative_district_upper(fields, state_fips, legislative_district)
* state_legislative_district_lower(fields, state_fips, legislative_district)

States
======

This package previously had a `census.states` module, but now uses the
*us* package. ::

    >>> from us import states
    >>> print states.MD.fips
    u'24'

Convert FIPS to state abbreviation using `lookup()`: ::

    >>> print states.lookup('24').abbr
    u'MD'


BYOS - Bring Your Own Session
=============================

If you'd prefer to use a custom configured requests.Session, you can pass it
to the Census constructor::

    s = requests.session()
    s.headers.update({'User-Agent': 'census-demo/0.0'})

    c = Census("MY_API_KEY", session=s)

You can also replace the session used by a specific data set::

    c.sf1.session = s


Examples
========

The geographic name for all census tracts for county 170 in Alaska::

    c.sf1.get('NAME', geo={'for': 'tract:*',
                           'in': 'state:{} county:170'.format(states.AK.fips)})

The same call using the `state_county_tract` convenience method::

    c.sf1.state_county_tract('NAME', states.AK.fips, '170', Census.ALL)

Total number of males age 5 - 9 for all states::

    c.acs5.get('B01001_004E', {'for': 'state:*'})

The same call using the state convenience method::

    c.acs5.state('B01001_004E', Census.ALL)

Don't know the list of tables in a survey, try this:

    c.acs5.tables()


================================================
FILE: census/__init__.py
================================================
from census.core import (Census, ALL, CensusException,
                         UnsupportedYearException)


================================================
FILE: census/core.py
================================================
import warnings
from functools import wraps, lru_cache
from importlib.metadata import version

__version__ = version('census')

ALL = '*'


def new_session(*args, **kwargs):
    import requests
    return requests.session(*args, **kwargs)


class APIKeyError(Exception):
    """ Invalid API key
    """

    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


def list_or_str(v):
    """ Convert a single value into a list.
    """
    if isinstance(v, (list, tuple)):
        return v
    return [v]


def float_or_str(v):
    try:
        return float(v)
    except ValueError:
        return str(v)


def supported_years(*years):
    def inner(func):
        @wraps(func)
        def wrapper(self, *args, **kwargs):
            year = kwargs.get('year', self.default_year)
            _years = years if years else self.years
            if int(year) not in _years:
                raise UnsupportedYearException(
                    'Geography is not available in {}. Available years include {}'.format(year, _years))
            return func(self, *args, **kwargs)
        return wrapper
    return inner


def retry_on_transient_error(func):

    def wrapper(self, *args, **kwargs):
        for _ in range(max(self.retries - 1, 0)):
            try:
                result = func(self, *args, **kwargs)
            except CensusException as e:
                if "There was an error while running your query.  We've logged the error and we'll correct it ASAP.  Sorry for the inconvenience." in str(e):
                    pass
                else:
                    raise
            else:
                return result
        else:
            return func(self, *args, **kwargs)

    return wrapper


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]


def merge(dicts):
    return dict(item for d in dicts for item in d.items())


class CensusException(Exception):
    pass


class UnsupportedYearException(CensusException):
    pass


class Client(object):
    endpoint_url = 'https://api.census.gov/data/%s/%s'
    definitions_url = 'https://api.census.gov/data/%s/%s/variables.json'
    definition_url = 'https://api.census.gov/data/%s/%s/variables/%s.json'
    groups_url = 'https://api.census.gov/data/%s/%s/groups.json'

    def __init__(self, key, year=None, session=None, retries=3):
        self._key = key
        self.session = session or new_session()
        if year:
            self.default_year = year
        self.retries = retries

    def tables(self, year=None):
        """
        Returns a list of the data tables available from this source.
        """
        # Set the default year if one hasn't been passed
        if year is None:
            year = self.default_year

        # Query the table metadata as raw JSON
        tables_url = self.groups_url % (year, self.dataset)
        resp = self.session.get(tables_url)

        # Pass it out
        return resp.json()['groups']

    @supported_years()
    def fields(self, year=None, flat=False):
        if year is None:
            year = self.default_year

        data = {}

        fields_url = self.definitions_url % (year, self.dataset)

        resp = self.session.get(fields_url)
        obj = resp.json()

        if flat:

            for key, elem in obj['variables'].items():
                if key in ['for', 'in']:
                    continue
                data[key] = "{}: {}".format(elem['concept'], elem['label'])

        else:

            data = obj['variables']
            if 'for' in data:
                data.pop("for", None)
            if 'in' in data:
                data.pop("in", None)

        return data

    def get(self, fields, geo, year=None, **kwargs):
        """
        The API only accepts up to 50 fields on each query.
        Chunk requests, and use the unique GEO_ID to match up the chunks
        in case the responses are in different orders.
        GEO_ID is not reliably present in pre-2010 requests.
        """
        sort_by_geoid = len(fields) > 49 and (not year or year > 2009)
        all_results = (self.query(forty_nine_fields, geo, year, sort_by_geoid=sort_by_geoid, **kwargs)
                       for forty_nine_fields in chunks(fields, 49))
        merged_results = [merge(result) for result in zip(*all_results)]

        return merged_results

    @retry_on_transient_error
    def query(self, fields, geo, year=None, sort_by_geoid=False, **kwargs):
        if year is None:
            year = self.default_year

        fields = list_or_str(fields)
        if sort_by_geoid:
            if isinstance(fields, list):
                fields += ['GEO_ID']
            elif isinstance(fields, tuple):
                fields += ('GEO_ID',)

        url = self.endpoint_url % (year, self.dataset)

        params = {
            'get': ",".join(fields),
            'for': geo['for'],
            'key': self._key,
        }

        if 'in' in geo:
            params['in'] = geo['in']

        resp = self.session.get(url, params=params)

        if resp.status_code == 200:
            try:
                data = resp.json()
            except ValueError as ex:
                if '<title>Invalid Key</title>' in resp.text:
                    raise APIKeyError(' '.join(resp.text.splitlines()))
                else:
                    raise ex

            headers = data.pop(0)
            types = [self._field_type(header, year) for header in headers]
            results = [{header: (cast(item) if item is not None else None)
                        for header, cast, item
                        in zip(headers, types, d)}
                       for d in data]
            if sort_by_geoid:
                if 'GEO_ID' in fields:
                    results = sorted(results, key=lambda x: x['GEO_ID'])
                else:
                    results = sorted(results, key=lambda x: x.pop('GEO_ID'))
            return results

        elif resp.status_code == 204:
            return []

        else:
            raise CensusException(resp.text)

    @lru_cache(maxsize=1024)
    def _field_type(self, field, year):
        url = self.definition_url % (year, self.dataset, field)
        resp = self.session.get(url)

        types = {"fips-for": str,
                 "fips-in": str,
                 "int": float_or_str,
                 "long": float_or_str,
                 "float": float,
                 "string": str}

        if resp.status_code == 200:
            predicate_type = resp.json().get("predicateType", "string")
            return types[predicate_type]
        else:
            return str

    @supported_years()
    def us(self, fields, **kwargs):
        return self.get(fields, geo={'for': 'us:1'}, **kwargs)

    @supported_years()
    def state(self, fields, state_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years()
    def state_county(self, fields, state_fips, county_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'county:{}'.format(county_fips),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years()
    def state_place(self, fields, state_fips, place, **kwargs):
        return self.get(fields, geo={
            'for': 'place:{}'.format(place),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years()
    def state_district(self, fields, state_fips, district, **kwargs):
        warnings.warn(
            "state_district refers to congressional districts; use state_congressional_district instead",
            DeprecationWarning
        )

        # throwaway, but we can't pass it in twice.
        kwargs.pop('congressional_district', None)

        return self.state_congressional_district(fields, state_fips, district, **kwargs)

    @supported_years()
    def state_congressional_district(self, fields, state_fips, congressional_district, **kwargs):
        return self.get(fields, geo={
            'for': 'congressional district:{}'.format(congressional_district),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years()
    def state_legislative_district_upper(self, fields, state_fips, legislative_district, **kwargs):
        return self.get(fields, geo={
            'for': 'state legislative district (upper chamber):{}'.format(str(legislative_district).zfill(3)),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years()
    def state_legislative_district_lower(self, fields, state_fips, legislative_district, **kwargs):
        return self.get(fields, geo={
            'for': 'state legislative district (lower chamber):{}'.format(str(legislative_district).zfill(3)),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)
    @supported_years()
    def combined_statistical_area(self, fields, csa, **kwargs):
        return self.get(fields, geo={
            'for': 'combined statistical area:{}'.format(str(csa)),
        }, **kwargs)
    
    @supported_years()
    def msa(self, fields, msa, **kwargs):
        return self.get(fields, geo={
            'for': ('metropolitan statistical area/' +
                'micropolitan statistical area:{}'.format(msa)), 
        }, **kwargs)

class ACSClient(Client):

    def _switch_endpoints(self, year):

        if year >= 2005:
            self.endpoint_url = 'https://api.census.gov/data/%s/acs/%s'
            self.definitions_url = 'https://api.census.gov/data/%s/acs/%s/variables.json'
            self.definition_url = 'https://api.census.gov/data/%s/acs/%s/variables/%s.json'
            self.groups_url = 'https://api.census.gov/data/%s/acs/%s/groups.json'
        else:
            self.endpoint_url = super(ACSClient, self).endpoint_url
            self.definitions_url = super(ACSClient, self).definitions_url
            self.definition_url = super(ACSClient, self).definition_url
            self.groups_url = super(ACSClient, self).groups_url

    def tables(self, *args, **kwargs):
        self._switch_endpoints(kwargs.get('year', self.default_year))
        return super(ACSClient, self).tables(*args, **kwargs)

    def get(self, *args, **kwargs):
        self._switch_endpoints(kwargs.get('year', self.default_year))

        return super(ACSClient, self).get(*args, **kwargs)


class ACS5Client(ACSClient):

    default_year = 2024
    dataset = 'acs5'

    years = (2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009)

    @supported_years()
    def state_county_subdivision(self, fields, state_fips,
                                 county_fips, subdiv_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'county subdivision:{}'.format(subdiv_fips),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)

    @supported_years()
    def state_county_tract(self, fields, state_fips,
                           county_fips, tract, **kwargs):
        return self.get(fields, geo={
            'for': 'tract:{}'.format(tract),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)

    @supported_years()
    def state_county_blockgroup(self, fields, state_fips, county_fips,
                                blockgroup, tract=None, **kwargs):
        geo = {
            'for': 'block group:{}'.format(blockgroup),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }
        if tract:
            geo['in'] += ' tract:{}'.format(tract)
        return self.get(fields, geo=geo, **kwargs)

    @supported_years(2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011)
    def zipcode(self, fields, zcta, **kwargs):
        warnings.warn(
            "zipcode has been deprecated; use state_zipcode instead",
            DeprecationWarning
        )

        state_fips = kwargs.pop('state_fips')

        return self.state_zipcode(fields, state_fips, zcta, **kwargs)

    @supported_years(2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011)
    def state_zipcode(self, fields, state_fips, zcta, **kwargs):
        year = kwargs.get('year', self.default_year)
        geo = {
            'for': 'zip code tabulation area:{}'.format(zcta),
        }
        # for 2020 onward, we need to use "regionin" instead of "in" due to ZCTA's being nested under states 
        if year < 2020:
            geo["in"] = 'state:{}'.format(state_fips)
        else:
            geo["regionin"] = 'state:{}'.format(state_fips)
        return self.get(fields, geo, **kwargs)


class ACS5DpClient(ACS5Client):

    dataset = 'acs5/profile'


class ACS5StClient(ACS5Client):
    def _switch_endpoints(self, year):
        self.endpoint_url = 'https://api.census.gov/data/%s/acs/%s'
        self.definitions_url = 'https://api.census.gov/data/%s/acs/%s/variables.json'
        self.definition_url = 'https://api.census.gov/data/%s/acs/%s/variables/%s.json'
        self.groups_url = 'https://api.census.gov/data/%s/acs/%s/groups.json'

    dataset = 'acs5/subject'


class ACS3Client(ACSClient):

    default_year = 2013
    dataset = 'acs3'

    years = (2013, 2012)

    @supported_years()
    def state_county_subdivision(self, fields, state_fips,
                                 county_fips, subdiv_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'county subdivision:{}'.format(subdiv_fips),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)


class ACS3DpClient(ACS3Client):

    dataset = 'acs3/profile'


class ACS1Client(ACSClient):

    default_year = 2024
    dataset = 'acs1'

    years = (2024, 2023, 2022, 2021, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007, 2006, 2005)

    @supported_years()
    def state_county_subdivision(self, fields, state_fips,
                                 county_fips, subdiv_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'county subdivision:{}'.format(subdiv_fips),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)


class ACS1DpClient(ACS1Client):

    dataset = 'acs1/profile'

    years = (2024, 2023, 2022, 2021, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012)


class SF1Client(Client):

    default_year = 2010
    dataset = 'sf1'

    years = (2010,)

    def _switch_endpoints(self, year):

        self.endpoint_url = 'https://api.census.gov/data/%s/dec/%s'
        self.definitions_url = 'https://api.census.gov/data/%s/dec/%s/variables.json'
        self.definition_url = 'https://api.census.gov/data/%s/dec/%s/variables/%s.json'
        self.groups_url = 'https://api.census.gov/data/%s/dec/%s/groups.json'

    def tables(self, *args, **kwargs):
        self._switch_endpoints(kwargs.get('year', self.default_year))
        return super(SF1Client, self).tables(*args, **kwargs)

    def get(self, *args, **kwargs):
        self._switch_endpoints(kwargs.get('year', self.default_year))

        return super(SF1Client, self).get(*args, **kwargs)

    @supported_years()
    def state_county_subdivision(self, fields, state_fips,
                                 county_fips, subdiv_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'county subdivision:{}'.format(subdiv_fips),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)

    @supported_years()
    def state_county_tract(self, fields, state_fips,
                           county_fips, tract, **kwargs):
        return self.get(fields, geo={
            'for': 'tract:{}'.format(tract),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)

    @supported_years()
    def state_county_blockgroup(self, fields, state_fips, county_fips,
                                blockgroup, tract=None, **kwargs):
        geo = {
            'for': 'block group:{}'.format(blockgroup),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }
        if tract:
            geo['in'] += ' tract:{}'.format(tract)
        return self.get(fields, geo=geo, **kwargs)

    @supported_years(2010)
    def state_msa(self, fields, state_fips, msa, **kwargs):
        return self.get(fields, geo={
            'for': ('metropolitan statistical area/' +
                    'micropolitan statistical area (or part):{}'.format(msa)),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years(2010)
    def state_csa(self, fields, state_fips, csa, **kwargs):
        return self.get(fields, geo={
            'for': 'combined statistical area (or part):{}'.format(csa),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)

    @supported_years(2010)
    def state_district_place(self, fields, state_fips,
                             district, place, **kwargs):
        return self.get(fields, geo={
            'for': 'place/remainder (or part):{}'.format(place),
            'in': 'state:{} congressional district:{}'.format(
                state_fips, district),
        }, **kwargs)

    @supported_years(2010)
    def state_zipcode(self, fields, state_fips, zcta, **kwargs):
        return self.get(fields, geo={
            'for': 'zip code tabulation area (or part):{}'.format(zcta),
            'in': 'state:{}'.format(state_fips),
        }, **kwargs)


class PLClient(Client):

    default_year = 2020
    dataset = 'pl'

    years = (2020, 2010, 2000)

    def _switch_endpoints(self, year):

        self.endpoint_url = 'https://api.census.gov/data/%s/dec/%s'
        self.definitions_url = 'https://api.census.gov/data/%s/dec/%s/variables.json'
        self.definition_url = 'https://api.census.gov/data/%s/dec/%s/variables/%s.json'
        self.groups_url = 'https://api.census.gov/data/%s/dec/%s/groups.json'

    def tables(self, *args, **kwargs):
        self._switch_endpoints(kwargs.get('year', self.default_year))
        return super(PLClient, self).tables(*args, **kwargs)

    def get(self, *args, **kwargs):
        self._switch_endpoints(kwargs.get('year', self.default_year))

        return super(PLClient, self).get(*args, **kwargs)

    @supported_years()
    def state_county_subdivision(self, fields, state_fips,
                                 county_fips, subdiv_fips, **kwargs):
        return self.get(fields, geo={
            'for': 'county subdivision:{}'.format(subdiv_fips),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)

    @supported_years()
    def state_county_tract(self, fields, state_fips,
                           county_fips, tract, **kwargs):
        return self.get(fields, geo={
            'for': 'tract:{}'.format(tract),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }, **kwargs)

    @supported_years()
    def state_county_blockgroup(self, fields, state_fips, county_fips,
                                blockgroup, tract=None, **kwargs):
        geo = {
            'for': 'block group:{}'.format(blockgroup),
            'in': 'state:{} county:{}'.format(state_fips, county_fips),
        }
        if tract:
            geo['in'] += ' tract:{}'.format(tract)
        return self.get(fields, geo=geo, **kwargs)


class Census(object):

    ALL = ALL

    def __init__(self, key, year=None, session=None):

        if not session:
            session = new_session()

        self.session = session
        self.session.headers.update({
            'User-Agent': ('python-census/{} '.format(__version__) +
                           'github.com/datamade/census')
        })

        self._acs = ACS5Client(key, year, session)  # deprecated
        self.acs5 = ACS5Client(key, year, session)
        self.acs3 = ACS3Client(key, year, session)
        self.acs1 = ACS1Client(key, year, session)
        self.acs5st = ACS5StClient(key, year, session)
        self.acs5dp = ACS5DpClient(key, year, session)
        self.acs3dp = ACS3DpClient(key, year, session)
        self.acs1dp = ACS1DpClient(key, year, session)
        self.sf1 = SF1Client(key, year, session)
        self.pl = PLClient(key, year, session)

    @property
    def acs(self):
        warnings.warn('Use acs5 instead of acs', DeprecationWarning)
        return self._acs


================================================
FILE: census/tests/__init__.py
================================================


================================================
FILE: census/tests/test_census.py
================================================
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import time
import unittest

from census.core import (
    Census, UnsupportedYearException)

KEY = os.environ.get('CENSUS_KEY', '')

CLIENTS = (
    ('acs5', (
        'us', 'state', 'state_county', 'state_county_subdivision',
        'state_county_tract', 'state_county_blockgroup',
        'state_place', 'state_district',
        'state_congressional_district',
        'state_legislative_district_upper',
        'state_legislative_district_lower', 'zipcode',
    )),
    ('acs1dp', (
        'us', 'state', 'state_congressional_district',
    )),
    ('sf1', (
        'state', 'state_county', 'state_county_subdivision',
        'state_county_tract', 'state_county_blockgroup',
        'state_place', 'state_congressional_district',
        'state_msa', 'state_csa', 'state_district_place',
        'state_zipcode',
    )),
    ('sf3', (
        'state', 'state_county', 'state_county_tract',
        'state_county_blockgroup', 'state_place',
    )),
    ('pl', (
        'us', 'state', 'state_county', 'state_county_subdivision',
        'state_county_tract', 'state_county_blockgroup',
        'state_place', 'state_congressional_district',
        'state_legislative_district_upper',
        'state_legislative_district_lower',
    ))
)

TEST_DATA = {
    'state_fips': '24',
    'county_fips': '031',
    'subdiv_fips': '90796',
    'tract': '700706',
    'blockgroup': '1',
    'place': '31175',
    'district': '06',       # for old `state_district` calling.
    'congressional_district': '06',
    'legislative_district': '06',
    'zcta': '20877',
    'msa': '47900',
    'csa': '548',
}


class CensusTestCase(unittest.TestCase):

    def setUp(self):
        self._client = Census(KEY)

    def client(self, name):
        return getattr(self._client, name)

    def tearDown(self):
        self._client.session.close()


class TestUnsupportedYears(CensusTestCase):

    def setUp(self):
        self._client = Census(KEY, year=2008)

    def test_acs5(self):
        client = self.client('acs5')
        self.assertRaises(UnsupportedYearException,
                          client.state, ('NAME', '06'))

    def test_acs5st(self):
        client = self.client('acs5st')
        self.assertRaises(UnsupportedYearException,
                          client.state, ('NAME', '06'))

    def test_acs1dp(self):
        client = self.client('acs1dp')
        self.assertRaises(UnsupportedYearException,
                          client.state, ('NAME', '06'))

    def test_sf1(self):
        client = self.client('sf1')
        self.assertRaises(UnsupportedYearException,
                          client.state, ('NAME', '06'))

    def test_pl(self):
        client = self.client('sf1')
        self.assertRaises(UnsupportedYearException,
                          client.state, ('NAME', '06'))


class TestEncoding(CensusTestCase):
    """
    Test character encodings of results are properly handled.
    """

    def test_la_canada_2015(self):
        """
        The 'La Cañada Flintridge city, California' place can be a problem.
        """
        geo = {
            'for': 'place:39003',
            'in': u'state:06'
        }
        self.assertEqual(
            self._client.acs5.get('NAME', geo=geo)[0]['NAME'],
            u'La Cañada Flintridge city, California'
        )
        self.assertEqual(
            self._client.acs.get('NAME', geo=geo, year=2016)[0]['NAME'],
            'La Cañada Flintridge city, California'
        )
        # 2015 is returned as:
        # 'La Ca\xf1ada Flintridge city, California'
        self.assertEqual(
            self._client.acs.get('NAME', geo=geo, year=2015)[0]['NAME'],
            'La Cañada Flintridge city, California'
        )


class TestEndpoints(CensusTestCase):

    def check_endpoints(self, client_name, tests, **kwargs):

        client = self.client(client_name)
        fields = ('NAME',)

        for method_name, expected in tests:

            msg = '{}.{}'.format(client_name, method_name)

            method = getattr(client, method_name)
            data = method(fields, **TEST_DATA, **kwargs)
            self.assertTrue(data, msg)
            self.assertEqual(data[0]['NAME'], expected, msg)
            time.sleep(0.2)

    def test_tables(self):
        self.client('acs5').tables()
        self.client('acs5').tables(2010)
        self.client('sf1').tables()
        self.client('pl').tables()

    def test_acs5(self):

        tests = (
            ('us', 'United States'),
            ('state', 'Maryland'),
            ('state_county', 'Montgomery County, Maryland'),
            ('state_county_subdivision',
                'District 9, Montgomery County, Maryland'),
            ('state_county_tract',
                'Census Tract 7007.06; Montgomery County; Maryland'),
            ('state_county_blockgroup',
                ('Block Group 1; Census Tract 7007.06; '
                    'Montgomery County; Maryland')),
            ('state_place', 'Gaithersburg city, Maryland'),
            ('state_district',
                'Congressional District 6 (119th Congress), Maryland'),
            ('state_congressional_district',
                'Congressional District 6 (119th Congress), Maryland'),
            ('state_legislative_district_upper',
                'State Senate District 6 (2024); Maryland'),
            ('state_legislative_district_lower',
                'State Legislative District 6 (2024); Maryland'),
            ('state_zipcode', 'ZCTA5 20877'),
        )

        self.check_endpoints('acs5', tests)

    def test_acs5_previous_years(self):

        tests = (
            ('us', 'United States'),
            ('state', 'Maryland'),
            ('state_county', 'Montgomery County, Maryland'),
            ('state_county_subdivision',
                'District 9, Montgomery County, Maryland'),
            ('state_county_tract',
                'Census Tract 7007.06, Montgomery County, Maryland'),
            ('state_county_blockgroup',
                ('Block Group 1, Census Tract 7007.06, '
                    'Montgomery County, Maryland')),
            ('state_place', 'Gaithersburg city, Maryland'),
            ('state_district',
                'Congressional District 6 (116th Congress), Maryland'),
            ('state_congressional_district',
                'Congressional District 6 (116th Congress), Maryland'),
            ('state_legislative_district_upper',
                'State Senate District 6 (2018), Maryland'),
            ('state_legislative_district_lower',
                'State Legislative District 6 (2018), Maryland'),
            ('state_zipcode', 'ZCTA5 20877'),
        )

        self.check_endpoints('acs5', tests, year=2019)

    def test_acs5st(self):

        tests = (
            ('us', 'United States'),
            ('state', 'Maryland'),
            ('state_congressional_district',
                'Congressional District 6 (119th Congress), Maryland'),
        )

        self.check_endpoints('acs5st', tests)

    def test_acs1dp(self):

        tests = (
            ('us', 'United States'),
            ('state', 'Maryland'),
            ('state_congressional_district',
                'Congressional District 6 (119th Congress), Maryland'),
        )

        self.check_endpoints('acs1dp', tests)

    def test_sf1(self):

        tests = (
            ('state', 'Maryland'),
            ('state_county', 'Montgomery County, Maryland'),
            ('state_county_subdivision',
                ('District 9, Montgomery County, Maryland')),
            ('state_county_tract',
                'Census Tract 7007.06, Montgomery County, Maryland'),
            ('state_county_blockgroup',
                ('Block Group 1, Census Tract 7007.06, '
                 'Montgomery County, Maryland')),
            ('state_place', 'Gaithersburg city, Maryland'),
            ('state_congressional_district',
                'Congressional District 6 (111th Congress), Maryland'),
            ('state_msa',
                ('Washington-Arlington-Alexandria, '
                    'DC-VA-MD-WV Metro Area (part); Maryland')),
            ('state_csa',
                ('Washington-Baltimore-Northern Virginia, '
                    'DC-MD-VA-WV CSA (part); Maryland')),
            # ('state_district_place', 'District 9'),
            ('state_zipcode', 'ZCTA5 20877, Maryland'),
        )

        self.check_endpoints('sf1', tests)

    def test_pl(self):

        tests = (
            ('us', 'United States'),
            ('state', 'Maryland'),
            ('state_county', 'Montgomery County, Maryland'),
            ('state_county_subdivision',
                'District 9, Montgomery County, Maryland'),
            ('state_county_tract',
                'Census Tract 7007.06, Montgomery County, Maryland'),
            ('state_county_blockgroup',
                ('Block Group 1, Census Tract 7007.06, '
                    'Montgomery County, Maryland')),
            ('state_place', 'Gaithersburg city, Maryland'),
            ('state_district',
                'Congressional District 6 (116th Congress), Maryland'),
            ('state_congressional_district',
                'Congressional District 6 (116th Congress), Maryland'),
            ('state_legislative_district_upper',
                'State Senate District 6 (2018), Maryland'),
            ('state_legislative_district_lower',
                'State Legislative District 6 (2018), Maryland'),
        )

        self.check_endpoints('pl', tests)

    def test_more_than_50(self):
        fields = ['B01001_003E', 'B01001_004E', 'B01001_005E',
                  'B01001_006E', 'B01001_007E', 'B01001_008E',
                  'B01001_009E', 'B01001_010E', 'B01001_011E',
                  'B01001_012E', 'B01001_013E', 'B01001_014E',
                  'B01001_015E', 'B01001_016E', 'B01001_017E',
                  'B01001_018E', 'B01001_019E', 'B01001_020E',
                  'B01001_021E', 'B01001_022E', 'B01001_023E',
                  'B01001_024E', 'B01001_025E', 'B01001_027E',
                  'B01001_028E', 'B01001_029E', 'B01001_030E',
                  'B01001_031E', 'B01001_032E', 'B01001_033E',
                  'B01001_034E', 'B01001_035E', 'B01001_036E',
                  'B01001_037E', 'B01001_038E', 'B01001_039E',
                  'B01001_040E', 'B01001_041E', 'B01001_042E',
                  'B01001_043E', 'B01001_044E', 'B01001_045E',
                  'B01001_046E', 'B01001_047E', 'B01001_048E',
                  'B01001_049E', 'B19001_003E', 'B19001_004E',
                  'B19001_005E', 'B19001_006E', 'B19001_007E',
                  'B19001_008E', 'B19001_009E', 'B19001_010E',
                  'B19001_011E', 'B19001_012E', 'B19001_013E',
                  'B19001_014E', 'B19001_015E', 'B19001_016E']

        client = self.client('acs5')
        results = client.us(fields)
        assert set(results[0].keys()).issuperset(fields)
    
    def test_more_than_50_not_out_of_order(self):
        fields = ['GEO_ID', 'B01001_001E', 'B01001_003E',
                  'B01001_006E', 'B01001_007E', 'B01001_008E',
                  'B01001_009E', 'B01001_010E', 'B01001_011E',
                  'B01001_012E', 'B01001_013E', 'B01001_014E',
                  'B01001_015E', 'B01001_016E', 'B01001_017E',
                  'B01001_018E', 'B01001_019E', 'B01001_020E',
                  'B01001_021E', 'B01001_022E', 'B01001_023E',
                  'B01001_024E', 'B01001_025E', 'B01001_027E',
                  'B01001_028E', 'B01001_029E', 'B01001_030E',
                  'B01001_031E', 'B01001_032E', 'B01001_033E',
                  'B01001_034E', 'B01001_035E', 'B01001_036E',
                  'B01001_037E', 'B01001_038E', 'B01001_039E',
                  'B01001_040E', 'B01001_041E', 'B01001_042E',
                  'B01001_043E', 'B01001_044E', 'B01001_045E',
                  'B01001_046E', 'B01001_047E', 'B01001_048E',
                  'B01001_049E', 'B19001_003E', 'B19001_004E',
                  'B19001_005E', 'B19001_006E', 'B19001_007E',
                  'B19001_008E', 'B19001_009E', 'B19001_010E',
                  'B19001_011E', 'B19001_012E', 'B19001_013E',
                  'B19001_014E', 'B19001_015E', 'B19001_016E',
                  'B03002_001E', ]

        client = self.client('acs1')
        results = client.state_county(fields, '*', '*', year=2018)
        # We know that the last 5 digits of the GEO_ID are the FIPS code
        # GEO_ID is grabbed in the first chunk (request), but the state and county are overwritten
        # with each chunk and have the values from the last chunk
        assert results[0]['GEO_ID'][-5:] == results[0]['state'] + \
            results[0]['county']

    def test_new_style_endpoints(self):
        client = Census(KEY, year=2016)

        res_2016_2016 = client.acs1.state('B01001_004E', Census.ALL)
        res_2016_2014 = client.acs1.state('B01001_004E', Census.ALL, year=2014)

        client = Census(KEY, year=2014)

        res_2014_2014 = client.acs1.state('B01001_004E', Census.ALL)
        res_2014_2016 = client.acs1.state('B01001_004E', Census.ALL, year=2016)

        assert sorted(
            res_2016_2016, key=lambda x: x['state']
        ) == sorted(
            res_2014_2016, key=lambda x: x['state']
        )

        assert sorted(
            res_2014_2014, key=lambda x: x['state']
        ) == sorted(
            res_2016_2014, key=lambda x: x['state']
        )

    def test_older_sf1(self):
        client = Census(KEY)
        result_2010 = client.sf1.get(
            ('P008001',  # total
             'P008003',  # white
             'P008004',  # black
             'P008006',  # asian
             'P008010',  # latino
             ),
            geo={'for': 'place:53476', 'in': 'state:06'},
        )
        result_2000 = client.sf1.get(
            ('P008001',  # total
             'P008003',  # white
             'P008004',  # black
             'P008006',  # asian
             'P008010',  # latino
             ),
            geo={'for': 'place:53476', 'in': 'state:06'},
            year=2000,
        )
        assert result_2010 != result_2000


if __name__ == '__main__':
    unittest.main()


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools >= 40.6.0", "wheel"]
build-backend = "setuptools.build_meta"

================================================
FILE: requirements.txt
================================================
requests>=2.31
pytest>=2.9


================================================
FILE: setup.cfg
================================================
[metadata]
name = census
version = 0.8.26
author = Jeremy Carbaugh
author_email = jcarbaugh@sunlightfoundation.com
maintainer = Forest Gregg
maintainer_email = fgregg@gmail.com
license = BSD
description = A wrapper for the US Census Bureau's API
url = http://github.com/datamade/census
long_description = file: README.rst
classifiers =
    Development Status :: 4 - Beta
    Intended Audience :: Developers
    License :: OSI Approved :: BSD License
    Natural Language :: English
    Operating System :: OS Independent
    Programming Language :: Python
platforms = any

[options]
py_modules = census
packages = find:
install_requires =
    requests>=1.1.0


================================================
FILE: setup.py
================================================
from setuptools import setup

setup()