The Maxwell School
Syracuse University
Syracuse University
""" analyze.py Spring 2021 PJW Aggregate and analyze the educational attainment data. """ import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # # Set up defaults for matplotlib, seaborn and pandas # plt.rcParams['figure.dpi'] = 300 sns.set_theme(style="white") pd.set_option('display.max_rows',None) # # Read information about how to group the variables # var_info = pd.read_csv('census-variables.csv',index_col='variable') var_group = var_info['group'] print( "\nGroups:" ) print(var_group) # # Read the actual data # attain = pd.read_csv('census-data.csv',index_col='NAME') print( "\nAttainment:") print( attain ) # # Aggregate the columns using the grouping information # group_by_level = attain.groupby( var_group, axis='columns', sort=False ) by_level = group_by_level.sum() # # Extract the colum of totals and make sure remaining data adds up # total = by_level['total'] by_level = by_level.drop(columns='total') error = by_level.sum(axis='columns') - total print( "\nCheck sum:" ) print(error) # # Build a dataframe with percentages of the total # pct = 100*by_level.div(total,axis='index') # # Print a sorted list of counties ordered by percent of # the population with less than a HS education # print( "\nPercent