From fiedlerm@nber.org Wed Apr 14 15:21:45 2010 Date: Wed, 14 Apr 2010 14:21:29 -0400 From: Matthew Fiedler To: Jean Roth Subject: 1993 POS File Jean, As I mentioned in my last email, I've been doing some work with the CMS Provider of Services (POS) files for Amitabh. In the course of that work, I discovered that the SAS input statement that CMS provided for the 1993 POS file does not match the record layout provided for that year's file. The mismatch affects the sizable majority of the fields in the file, but two important examples are PROV1680 (the hospital provider number) and PROV2905 (the hospital zipcode). From looking at the file, I have concluded that the record layout is correct, while the SAS input statement is wrong. (I cannot make a statement one way or another about the COBOL input statement, as I didn't need it.) Anyway, in order to get the files into SAS, I wrote a Python script that parses the record layout and generates a correct SAS input statement. Since it seems like this might be useful to others working with these files, I thought I would pass it along to you to do with as you would like. The new SAS input statement as well as the Python script that generates it are attached to this email. Thanks much, Matt PS In case it is not clear from this email, the other years' POS files do not have this problem. [ Part 2: "Attached Text" ] import re import csv ## LIST OF FILE LAYOUTS ## fileList = [ (1991,"/homes/data/pos/1991-04/poslayout91.txt"), \ (1992,"/homes/data/pos/1992-04/poslayout92.txt"), \ (1993,"/homes/data/pos/1993-04/poslayout93.txt"), \ (1994,"/homes/data/pos/1994-04/poslayout94.txt"), \ (1995,"/homes/data/pos/1995-04/poslayout95.txt"), \ (1996,"/homes/data/pos/1996-04/poslayout96.txt"), \ (1997,"/homes/data/pos/1997-04/poslayout97.txt"), \ (1998,"/homes/data/pos/1998-04/poslayout98.txt"), \ (1999,"/homes/data/pos/1999-04/poslayout99.txt"), \ (2000,"/homes/data/pos/2000-04/poslayout00.txt"), \ (2001,"/homes/data/pos/2001/poslayout01.txt"), \ (2002,"/homes/data/pos/2002/posreclout.txt"), \ (2003,"/homes/data/pos/2003/posrlout.dec03.txt"), \ (2004,"/homes/data/pos/2004/pos404rlout.dec04.txt"), \ (2005,"/homes/data/pos/2005/rlout.dec05.txt"), \ (2006,"/homes/data/pos/2006/Record Layout.txt"), \ (2007,"/homes/data/pos/2007-04/rlout.dec07.txt"), \ (2008,"/homes/data/pos/2008/RLOUT_DEC08.txt") ] ## PARSE FILE LAYOUTS ## outDat = [] for fileyears in fileList: # Open the file and initalize the dictionary used to hold variable characteristics inFile = open(fileyears[1], "r") curLine = inFile.readline() tempDict = {} # Loop over the record layout and find each line that defines # the name and location of a variable. Since many variable definitions # are repeated once in each section of the record layout, I use a dictionary # to de-dup the list of lines. while curLine != "": # Seach for variable names matchLine = re.search("SSAMSASZ|SSAMSACD|SSAMSA|FIPCNTY|FIPSTATE|PROV[0-9]{4}",curLine) if matchLine != None: tempDict[curLine] = curLine curLine = inFile.readline() inFile.close() # Extract the final de-duped list of variable definition lines templist = tempDict.keys() # Process each of these lines to get the information on where to look for each variable # into a useable form. outlist = [] for line in templist: # Pulls line elements in sequence: 1) item length; 2) start character; 3) end character; # 4) character/numeric indicator; 5) variable name. temptuple = (line[47:53].strip(), line[53:59].strip(), line[59:64].strip(), line[64:65].strip(), line[69:len(line)].strip()) outlist.append(temptuple) # Sort by the starting character of the variable outlist.sort(key=lambda a: int(a[2])) # Open the file on which to write the new SAS input statement outfile = open("/homes/nber/fiedlerm/MedicareClaims/DiffDist/POSManifests/possas_new"+str(fileyears[0])+".sas","w") outfile.write("INPUT\n") # Loop over the variables and write the appropriate input statement for each one for myTup in outlist: outline = "\t@" + myTup[1] + "\t" + myTup[4] + "\t" # Character, decimal, and integer fields all need to be handled differently if myTup[3] == "C": outline += "$CHAR" + myTup[0] + ".\n" elif myTup[0].find(".") != -1: outline += myTup[0] + "\n" else: outline += myTup[0] + ".\n" outfile.write(outline) # Finish the file and close it out outfile.write(";\n") outfile.close() [ Part 3: "Attached Text" ] INPUT @1 PROV0085 $CHAR2. @3 PROV0075 $CHAR2. @5 PROV0095 2. @7 PROV0100 $CHAR6. @13 PROV3225 $CHAR28. @41 PROV0220 $CHAR1. @42 PROV2715 $CHAR1. @43 PROV2695 $CHAR3. @46 PROV0300 $CHAR10. @56 PROV0500 $CHAR6. @62 PROV2740 $CHAR6. @68 PROV0455 $CHAR1. @69 PROV0475 $CHAR38. @107 PROV0605 $CHAR5. @112 PROV0655 $CHAR12. @124 PROV1565 $CHAR6. @130 PROV1615 $CHAR6. @136 PROV1620 $CHAR5. @141 PROV1680 $CHAR10. @151 PROV1720 $CHAR1. @152 PROV1725 $CHAR2. @154 PROV2045 $CHAR1. @155 PROV3230 $CHAR2. @157 PROV2700 $CHAR2. @159 PROV2710 $CHAR3. @162 PROV2720 $CHAR38. @200 PROV1605 $CHAR10. @210 PROV2805 $CHAR1. @211 PROV2810 $CHAR6. @217 PROV2880 $CHAR1. @218 PROV2885 $CHAR2. @220 PROV2905 $CHAR5. @225 FIPSTATE $CHAR2. @227 FIPCNTY $CHAR3. @230 SSAMSA $CHAR3. @233 SSAMSASZ $CHAR1. @234 PROV0000 $CHAR6. @240 PROV0005 $CHAR6. @246 PROV0010 $CHAR1. @247 PROV0040 3. @250 PROV0045 $CHAR6. @256 PROV0050 $CHAR1. @257 PROV0055 $CHAR1. @258 PROV0060 $CHAR6. @264 PROV0740 5. @269 PROV0755 5. @274 PROV0760 7.2 @281 PROV0130 $CHAR10. @291 PROV0135 $CHAR10. @301 PROV0140 $CHAR10. @311 PROV0145 $CHAR10. @321 PROV0150 $CHAR10. @331 PROV0240 $CHAR1. @332 PROV0280 $CHAR1. @333 PROV0285 $CHAR1. @334 PROV0290 $CHAR1. @335 PROV3545 $CHAR1. @336 PROV3555 $CHAR1. @337 PROV3550 $CHAR1. @338 PROV0450 $CHAR6. @344 PROV0820 7.2 @351 PROV0485 $CHAR4. @355 PROV0950 7.2 @362 PROV0955 7.2 @369 PROV0645 $CHAR1. @370 PROV0670 $CHAR1. @371 PROV1050 7.2 @378 PROV1075 7.2 @385 PROV1575 $CHAR1. @386 PROV1125 7.2 @393 PROV1115 7.2 @400 PROV1520 $CHAR6. @406 PROV1670 $CHAR1. @407 PROV1690 3. @410 PROV1695 $CHAR6. @416 PROV1700 $CHAR1. @417 PROV1705 $CHAR1. @418 PROV1710 $CHAR6. @424 PROV1545 $CHAR1. @425 PROV1550 $CHAR1. @426 PROV1555 $CHAR1. @427 PROV1145 7.2 @434 PROV1100 7.2 @441 PROV1730 3. @444 PROV1735 $CHAR6. @450 PROV1740 $CHAR1. @451 PROV1745 $CHAR1. @452 PROV1750 $CHAR6. @458 PROV1755 $CHAR10. @468 PROV1805 $CHAR1. @469 PROV1810 $CHAR1. @470 PROV1815 $CHAR1. @471 PROV1820 $CHAR1. @472 PROV1165 7.2 @479 PROV2040 $CHAR1. @480 PROV2055 $CHAR1. @481 PROV2065 $CHAR1. @482 PROV2070 $CHAR1. @483 PROV2080 $CHAR1. @484 PROV2090 $CHAR1. @485 PROV2100 $CHAR1. @486 PROV2110 $CHAR1. @487 PROV2120 $CHAR1. @488 PROV2130 $CHAR1. @489 PROV2140 $CHAR1. @490 PROV2160 $CHAR1. @491 PROV2175 $CHAR1. @492 PROV2190 $CHAR1. @493 PROV2185 $CHAR1. @494 PROV2205 $CHAR1. @495 PROV2210 $CHAR1. @496 PROV2215 $CHAR1. @497 PROV2235 $CHAR1. @498 PROV2245 $CHAR1. @499 PROV2265 $CHAR1. @500 PROV2270 $CHAR1. @501 PROV2285 $CHAR1. @502 PROV2300 $CHAR1. @503 PROV2295 $CHAR1. @504 PROV2310 $CHAR1. @505 PROV2315 $CHAR1. @506 PROV2350 $CHAR1. @507 PROV2355 $CHAR1. @508 PROV2360 $CHAR1. @509 PROV2365 $CHAR1. @510 PROV2370 $CHAR1. @511 PROV2410 $CHAR1. @512 PROV2415 $CHAR1. @513 PROV2440 $CHAR1. @514 PROV2445 $CHAR1. @515 PROV2450 $CHAR1. @516 PROV2470 $CHAR1. @517 PROV2475 $CHAR1. @518 PROV2485 $CHAR1. @519 PROV2505 $CHAR1. @520 PROV2795 $CHAR1. @521 PROV2800 $CHAR1. @522 PROV2890 $CHAR2. @524 PROV0690 $CHAR1. @525 PROV0695 7.2 @532 PROV0700 7.2 @539 PROV0705 7.2 @546 PROV0710 7.2 @553 PROV0715 7.2 @560 PROV0720 7.2 @567 PROV0030 $CHAR6. @573 PROV1000 7.2 @580 PROV1005 7.2 @587 PROV1010 7.2 @594 PROV1445 4. @598 PROV1455 4. @602 PROV1450 4. @606 PROV0110 $CHAR1. @607 PROV0225 $CHAR1. @608 PROV0270 $CHAR1. @609 PROV0295 $CHAR1. @610 PROV0785 7.2 @617 PROV0790 7.2 @624 PROV0795 7.2 @631 PROV0805 7.2 @638 PROV0810 7.2 @645 PROV0815 7.2 @652 PROV0465 $CHAR1. @653 PROV0860 7.2 @660 PROV0865 7.2 @667 PROV0870 7.2 @674 PROV0925 7.2 @681 PROV0930 7.2 @688 PROV0935 7.2 @695 PROV1465 7.2 @702 PROV1470 7.2 @709 PROV1475 7.2 @716 PROV0640 $CHAR6. @722 PROV0960 7.2 @729 PROV0965 7.2 @736 PROV0970 7.2 @743 PROV0980 7.2 @750 PROV0985 7.2 @757 PROV0990 7.2 @764 PROV0680 $CHAR38. @802 PROV0675 $CHAR1. @803 PROV1035 7.2 @810 PROV1040 7.2 @817 PROV1045 7.2 @824 PROV1020 7.2 @831 PROV1025 7.2 @838 PROV1030 7.2 @845 PROV1535 $CHAR1. @846 PROV1540 $CHAR1. @847 PROV3265 7.2 @854 PROV3245 7.2 @861 PROV3255 7.2 @868 PROV1060 7.2 @875 PROV1065 7.2 @882 PROV1070 7.2 @889 PROV1085 7.2 @896 PROV1090 7.2 @903 PROV1095 7.2 @910 PROV1430 7.2 @917 PROV1435 7.2 @924 PROV1440 7.2 @931 PROV1415 7.2 @938 PROV1420 7.2 @945 PROV1425 7.2 @952 PROV3270 7.2 @959 PROV3250 7.2 @966 PROV3260 7.2 @973 PROV1130 7.2 @980 PROV1135 7.2 @987 PROV1140 7.2 @994 PROV1610 $CHAR6. @1000 PROV1640 $CHAR6. @1006 PROV1675 $CHAR1. @1007 PROV1150 7.2 @1014 PROV1155 7.2 @1021 PROV1160 7.2 @1028 PROV1825 $CHAR6. @1034 PROV1170 7.2 @1041 PROV1175 7.2 @1048 PROV1180 7.2 @1055 PROV0725 3. @1058 PROV0730 3. @1061 PROV0800 3. @1064 PROV0855 3. @1067 PROV0905 3. @1070 PROV0920 3. @1073 PROV0940 3. @1076 PROV1205 3. @1079 PROV1460 3. @1082 PROV1190 7.2 @1089 PROV1195 7.2 @1096 PROV1200 7.2 @1103 PROV3390 $CHAR1. @1104 PROV3385 $CHAR1. @1105 PROV3380 $CHAR1. @1106 PROV3525 $CHAR1. @1107 PROV3520 $CHAR1. @1108 PROV3515 $CHAR1. @1109 PROV3495 $CHAR1. @1110 PROV3490 $CHAR1. @1111 PROV3485 $CHAR1. @1112 PROV3435 $CHAR1. @1113 PROV3425 $CHAR1. @1114 PROV3345 $CHAR1. @1115 PROV3340 $CHAR1. @1116 PROV3335 $CHAR1. @1117 PROV3535 $CHAR1. @1118 PROV3540 $CHAR1. @1119 PROV3530 $CHAR1. @1120 PROV3465 $CHAR1. @1121 PROV3460 $CHAR1. @1122 PROV3455 $CHAR1. @1123 PROV3315 $CHAR1. @1124 PROV3310 $CHAR1. @1125 PROV3305 $CHAR1. @1126 PROV3360 $CHAR1. @1127 PROV3355 $CHAR1. @1128 PROV3350 $CHAR1. @1129 PROV3330 $CHAR1. @1130 PROV3325 $CHAR1. @1131 PROV3320 $CHAR1. @1132 PROV3300 $CHAR1. @1133 PROV3295 $CHAR1. @1134 PROV3290 $CHAR1. @1135 PROV3375 $CHAR1. @1136 PROV3370 $CHAR1. @1137 PROV3365 $CHAR1. @1138 PROV3285 $CHAR1. @1139 PROV3280 $CHAR1. @1140 PROV3275 $CHAR1. @1141 PROV3450 $CHAR1. @1142 PROV3445 $CHAR1. @1143 PROV3440 $CHAR1. @1144 PROV3405 $CHAR1. @1145 PROV3400 $CHAR1. @1146 PROV3395 $CHAR1. @1147 PROV3420 $CHAR1. @1148 PROV3415 $CHAR1. @1149 PROV3410 $CHAR1. @1150 PROV3480 $CHAR1. @1151 PROV3475 $CHAR1. @1152 PROV3470 $CHAR1. @1153 PROV3510 $CHAR1. @1154 PROV3505 $CHAR1. @1155 PROV0555 $CHAR1. @1156 PROV1525 $CHAR1. @1157 PROV0745 2. @1159 PROV0105 $CHAR1. @1160 PROV0560 $CHAR1. @1161 PROV0910 7.2 @1168 PROV0665 $CHAR1. @1169 PROV0570 $CHAR6. @1175 PROV0650 $CHAR6. @1181 PROV1120 7.2 @1188 PROV1185 7.2 @1195 PROV1220 7.2 @1202 PROV2075 $CHAR1. @1203 PROV2155 $CHAR1. @1204 PROV2195 $CHAR1. @1205 PROV2220 $CHAR1. @1206 PROV2250 $CHAR1. @1207 PROV2255 $CHAR1. @1208 PROV2340 $CHAR1. @1209 PROV2520 $CHAR1. @1210 PROV2535 $CHAR1. @1211 PROV2725 $CHAR1. @1212 PROV1530 $CHAR1. @1213 PROV1240 2. @1215 PROV2615 $CHAR2. @1217 PROV0615 $CHAR1. @1218 PROV0775 3. @1221 PROV0765 3. @1224 PROV0770 3. @1227 PROV0880 3. @1230 PROV0885 3. @1233 PROV0895 3. @1236 PROV0900 3. @1239 PROV0875 3. @1242 PROV0890 3. @1245 PROV2085 $CHAR1. @1246 PROV0830 3. @1249 PROV0835 3. @1252 PROV0840 3. @1255 PROV0845 3. @1258 PROV0825 3. @1261 PROV0850 3. @1264 PROV1275 3. @1267 PROV1285 3. @1270 PROV1290 3. @1273 PROV1295 3. @1276 PROV1300 3. @1279 PROV1305 3. @1282 PROV1280 3. @1285 PROV1310 3. @1288 PROV1315 3. @1291 PROV1320 3. @1294 PROV1345 3. @1297 PROV1330 3. @1300 PROV1325 3. @1303 PROV1335 3. @1306 PROV1340 3. @1309 PROV1350 3. @1312 PROV1355 3. @1315 PROV1360 3. @1318 PROV1365 3. @1321 PROV1370 3. @1324 PROV1375 3. @1327 PROV1245 3. @1330 PROV1260 3. @1333 PROV1265 3. @1336 PROV1380 3. @1339 PROV1255 3. @1342 PROV1250 3. @1345 PROV1385 3. @1348 PROV1390 3. @1351 PROV1395 3. @1354 PROV1400 3. @1357 PROV1410 3. @1360 PROV1405 3. @1363 PROV1865 $CHAR1. @1364 PROV1870 $CHAR1. @1365 PROV1875 $CHAR1. @1366 PROV1880 $CHAR1. @1367 PROV1885 $CHAR1. @1368 PROV1890 $CHAR1. @1369 PROV1895 $CHAR1. @1370 PROV1900 $CHAR1. @1371 PROV1910 $CHAR1. @1372 PROV1915 $CHAR1. @1373 PROV1920 $CHAR1. @1374 PROV1925 $CHAR1. @1375 PROV1930 $CHAR1. @1376 PROV1935 $CHAR1. @1377 PROV1940 $CHAR1. @1378 PROV1945 $CHAR1. @1379 PROV1950 $CHAR1. @1380 PROV1955 $CHAR1. @1381 PROV1960 $CHAR1. @1382 PROV1965 $CHAR1. @1383 PROV1970 $CHAR1. @1384 PROV1975 $CHAR1. @1385 PROV1980 $CHAR1. @1386 PROV1985 $CHAR1. @1387 PROV1990 $CHAR1. @1388 PROV1995 $CHAR1. @1389 PROV2000 $CHAR1. @1390 PROV2005 $CHAR1. @1391 PROV2010 $CHAR1. @1392 PROV2015 $CHAR1. @1393 PROV2020 $CHAR1. @1394 PROV1715 $CHAR1. @1395 PROV0735 7.2 @1402 PROV0750 7.2 @1409 PROV1515 7.2 @1416 PROV1685 $CHAR1. @1417 PROV1105 7.2 @1424 PROV1215 7.2 @1431 PROV1210 7.2 @1438 PROV2500 $CHAR1. @1439 PROV0070 $CHAR1. @1440 PROV0090 $CHAR1. @1441 PROV0235 $CHAR1. @1442 PROV0250 $CHAR1. @1443 PROV0275 $CHAR1. @1444 PROV0685 $CHAR2. @1446 PROV0470 $CHAR1. @1447 PROV0265 $CHAR1. @1448 PROV1600 $CHAR1. @1449 PROV1590 $CHAR1. @1450 PROV1595 $CHAR6. @1456 PROV1760 $CHAR1. @1457 PROV1765 $CHAR6. @1463 PROV1580 $CHAR1. @1464 PROV1585 $CHAR6. @1470 PROV1770 $CHAR1. @1471 PROV1775 $CHAR6. @1477 PROV2030 $CHAR1. @1478 PROV2025 $CHAR1. @1479 PROV2035 $CHAR6. @1485 PROV2705 $CHAR1. @1486 PROV1230 3. @1489 PROV1235 3. @1492 PROV2855 3. @1495 PROV2870 $CHAR1. @1496 PROV0945 4. @1500 PROV0780 7.2 @1507 PROV0620 $CHAR6. @1513 PROV0625 $CHAR6. @1519 PROV0630 $CHAR6. @1525 PROV1630 $CHAR6. @1531 PROV1635 $CHAR6. @1537 PROV2850 9.2 @1546 PROV0480 $CHAR1. @1547 PROV1015 7.2 @1554 PROV1560 $CHAR10. @1564 PROV1110 7.2 @1571 PROV2845 $CHAR26. @1597 PROV1570 $CHAR1. @1598 PROV2275 $CHAR1. @1599 PROV2280 $CHAR1. @1600 PROV2325 $CHAR1. @1601 PROV2330 $CHAR1. @1602 PROV2335 $CHAR1. @1603 PROV2375 $CHAR1. @1604 PROV2380 $CHAR1. @1605 PROV2385 $CHAR1. @1606 PROV2390 $CHAR1. @1607 PROV2395 $CHAR1. @1608 PROV2420 $CHAR1. @1609 PROV2425 $CHAR1. @1610 PROV2430 $CHAR1. @1611 PROV2455 $CHAR1. @1612 PROV2460 $CHAR1. @1613 PROV2465 $CHAR1. @1614 PROV2490 $CHAR1. @1615 PROV2495 $CHAR1. @1616 PROV2510 $CHAR1. @1617 PROV2515 $CHAR1. @1618 PROV0415 $CHAR6. @1624 PROV0550 $CHAR1. @1625 PROV0565 $CHAR1. @1626 PROV1055 2. @1628 PROV2095 $CHAR1. @1629 PROV2145 $CHAR1. @1630 PROV2150 $CHAR1. @1631 PROV2240 $CHAR1. @1632 PROV2260 $CHAR1. @1633 PROV2290 $CHAR1. @1634 PROV2305 $CHAR1. @1635 PROV2320 $CHAR1. @1636 PROV2345 $CHAR1. @1637 PROV2400 $CHAR1. @1638 PROV2525 $CHAR1. @1639 PROV2530 $CHAR1. @1640 PROV2135 $CHAR1. @1641 PROV2200 $CHAR1. @1642 PROV2435 $CHAR1. @1643 PROV0015 $CHAR1. @1644 PROV1225 7.2 @1651 PROV1480 7.2 @1658 PROV1485 7.2 @1665 PROV0915 7.2 @1672 PROV1490 7.2 @1679 PROV1495 7.2 @1686 PROV0975 7.2 @1693 PROV1510 7.2 @1700 PROV1500 7.2 @1707 PROV1505 7.2 @1714 PROV2115 $CHAR1. @1715 PROV2165 $CHAR1. @1716 PROV2170 $CHAR1. @1717 PROV2225 $CHAR1. @1718 PROV2480 $CHAR1. @1719 PROV1080 7.2 @1726 PROV2860 9.2 @1735 PROV3710 $CHAR1. @1736 PROV3705 $CHAR6. @1742 PROV3700 $CHAR1. ;