The Maxwell School
Syracuse University
Syracuse University
""" append.py Spring 2022 PJW Stack Census block group data state by state into a large dataframe. """ import pandas as pd import zipfile # # Open the zip archive of block group data # archive = zipfile.ZipFile('bg_by_state.zip') #%% # # Read the files in the zip archive one by one and append them to a # new dataframe. # combined = pd.DataFrame() fips = {'state':str,'county':str, 'block group':str, 'tract':str} n_files = 0 for f in archive.namelist(): fh = archive.open(f) cur = pd.read_csv(fh,dtype=fips) combined = pd.concat( [combined, cur] ) n_files += 1 n_rec = len(combined) print( f'read: {n_files} files, {n_rec} records') #%% # # Build a blockgroup FIPS code and drop the other pieces. # combined['bg'] = combined['tract']+combined['block group'] combined = combined.drop(columns=['tract','block group']) #%% # # Rename the race columns and then calculate the difference # varmap = { "B02001_001E":"total", "B02001_002E":"white" } combined = combined.rename(columns=varmap) combined['nonwhite'] = combined['total'] - combined['white'] #%% # # Save the result # combined.to_csv('append.csv',index=False)