Computational Exercises: Solutions > Using the Census API (g19)

analyze.py

"""
analyze.py
Spring 2021 PJW

Aggregate and analyze the educational attainment data.
"""

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#
#  Set up defaults for matplotlib, seaborn and pandas
#

plt.rcParams['figure.dpi'] = 300
sns.set_theme(style="white")

pd.set_option('display.max_rows',None)

#
#  Read information about how to group the variables
#

var_info = pd.read_csv('census-variables.csv',index_col='variable')
var_group = var_info['group']

print( "\nGroups:" )
print(var_group)

#
#  Read the actual data
#

attain = pd.read_csv('census-data.csv',index_col='NAME')

print( "\nAttainment:")
print( attain )

#
#  Aggregate the columns using the grouping information
#

group_by_level = attain.groupby( var_group, axis='columns', sort=False )
by_level = group_by_level.sum()

#
#  Extract the colum of totals and make sure remaining data adds up
#

total = by_level['total']
by_level = by_level.drop(columns='total')

error = by_level.sum(axis='columns') - total

print( "\nCheck sum:" )
print(error)

#
#  Build a dataframe with percentages of the total
#

pct = 100*by_level.div(total,axis='index')

#
#  Print a sorted list of counties ordered by percent of
#  the population with less than a HS education
#

print( "\nPercent 
Site Index | Zoom | Admin
URL: https://cleanenergyfutures.insightworks.com/pages/7577.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 04/26/2022