Computational Exercises: Solutions > Computing County Dissimilarity Indexes (g16)

append.py

"""
append.py
Spring 2022 PJW

Stack Census block group data state by state into a large dataframe.
"""

import pandas as pd
import zipfile

#
#  Open the zip archive of block group data
#

archive = zipfile.ZipFile('bg_by_state.zip')

#%%
#
#  Read the files in the zip archive one by one and append them to a 
#  new dataframe.
#

combined = pd.DataFrame()

fips = {'state':str,'county':str, 'block group':str, 'tract':str}

n_files = 0 
for f in archive.namelist():
    fh = archive.open(f)
    cur = pd.read_csv(fh,dtype=fips)
    combined = pd.concat( [combined, cur] )
    n_files += 1

n_rec = len(combined)
print( f'read: {n_files} files, {n_rec} records')

#%%
#
#  Build a blockgroup FIPS code and drop the other pieces.
#

combined['bg'] = combined['tract']+combined['block group']

combined = combined.drop(columns=['tract','block group'])

#%%
#
#  Rename the race columns and then calculate the difference
#

varmap = { 
    "B02001_001E":"total",
    "B02001_002E":"white"
    }

combined = combined.rename(columns=varmap)

combined['nonwhite'] = combined['total'] - combined['white']

#%%
#
#  Save the result
#

combined.to_csv('append.csv',index=False)    

Site Index | Zoom | Admin
URL: https://cleanenergyfutures.insightworks.com/pages/7442.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 03/27/2022