Preprocess R data with python

python

data

(for archival purposes only, probably don’t use this.)

Published

July 11, 2014

This script can be easily modified to code, recode, or modify csv files prior to loading in R.

#  transform a raw data file into a happy proto data frame
#

import csv

f = open( 'transposed.csv' )
hnps = csv.reader( f, delimiter = ',' )
header = hnps.next()

verbs = [
     'mentioned',
     'indicated',
     'proposed',
     'suggested',
     'recommended',
     'confessed',
     'stated',
     'announced',
     'muttered',
     'explained',
     ]

out_name = 'reshaped.csv'
out = open( out_name, 'w' )
data = csv.writer( out, delimiter = ',' )
data.writerow( [ 'subject', 'score', 'shift', 'length', 'verb' ])

for line in hnps:
    shift_length = line[header.index('V1')]

    try:
    if shift_length[0] == "N":
        shift = 'N'
        length = shift_length[1:]
    elif shift_length[0] == "S":
        shift = 'S'
        length = shift_length[1:]
    except IndexError:
    continue

    sentence = line[header.index('ResponseID')]
    for v in verbs:
    # we're in trouble if one of the sentences contains more than one verb
    # fortunately, they don't.
    if v in sentence:
        verb = v
        break

    for s in range( 1, 193 ):
    try:
        line[header.index( 'ID.' + str(s))]
    except IndexError:
        # that cell is empty for this subject
        continue
    else:
        # if this subject responded to this item add data point to file
        score = line[header.index( 'ID.' + str(s))]
        if score.isdigit():
        data.writerow( [ s, score, shift, length, verb ])

f.close()
out.close()