# transform a raw data file into a happy proto data frame
#
import csv
f = open( 'transposed.csv' )
hnps = csv.reader( f, delimiter = ',' )
header = hnps.next()
verbs = [
'mentioned',
'indicated',
'proposed',
'suggested',
'recommended',
'confessed',
'stated',
'announced',
'muttered',
'explained',
]
out_name = 'reshaped.csv'
out = open( out_name, 'w' )
data = csv.writer( out, delimiter = ',' )
data.writerow( [ 'subject', 'score', 'shift', 'length', 'verb' ])
for line in hnps:
shift_length = line[header.index('V1')]
try:
if shift_length[0] == "N":
shift = 'N'
length = shift_length[1:]
elif shift_length[0] == "S":
shift = 'S'
length = shift_length[1:]
except IndexError:
continue
sentence = line[header.index('ResponseID')]
for v in verbs:
# we're in trouble if one of the sentences contains more than one verb
# fortunately, they don't.
if v in sentence:
verb = v
break
for s in range( 1, 193 ):
try:
line[header.index( 'ID.' + str(s))]
except IndexError:
# that cell is empty for this subject
continue
else:
# if this subject responded to this item add data point to file
score = line[header.index( 'ID.' + str(s))]
if score.isdigit():
data.writerow( [ s, score, shift, length, verb ])
f.close()
out.close()Preprocess R data with python
python
data
(for archival purposes only, probably don’t use this.)
This script can be easily modified to code, recode, or modify csv files prior to loading in R.