Class cascading.DistanceUseCasePlatformTest

4

tests

0

failures

1m15.62s

duration

100%

successful

Tests

Test Duration Result
testEuclideanDistance 18.294s passed
testEuclideanDistanceComposite 19.173s passed
testEuclideanDistanceShort 18.264s passed
testPearsonDistanceComposite 19.890s passed

Standard output

2013-10-31 15:23:42,304 INFO  platform.PlatformRunner (PlatformRunner.java:getPlatformClassesFromClasspath(187)) - found 1 test platforms from classpath
2013-10-31 15:23:42,322 INFO  platform.PlatformRunner (PlatformRunner.java:addPlatform(237)) - installing platform: hadoop
2013-10-31 15:23:42,323 INFO  platform.PlatformRunner (PlatformRunner.java:addPlatform(238)) - running test: cascading.DistanceUseCasePlatformTest
2013-10-31 15:23:42,342 INFO  hadoop.HadoopPlatform (HadoopPlatform.java:setUp(112)) - not using cluster
2013-10-31 15:23:42,681 INFO  util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:23:42,682 INFO  planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:23:42,693 INFO  property.AppProps (AppProps.java:getAppID(162)) - using app.id: DA2015CDEA6F4DF59CCA8D7592DD44F0
2013-10-31 15:23:42,897 INFO  util.Version (Version.java:printBanner(78)) - Concurrent, Inc - Cascading 2.2.0
2013-10-31 15:23:42,899 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting
2013-10-31 15:23:42,900 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:23:42,900 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite"]
2013-10-31 15:23:42,900 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  parallel execution is enabled: false
2013-10-31 15:23:42,900 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  starting jobs: 3
2013-10-31 15:23:42,900 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  allocating threads: 1
2013-10-31 15:23:42,901 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (1/3)
2013-10-31 15:23:42,928 WARN  util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(52)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2013-10-31 15:23:42,933 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:23:42,933 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:23:42,957 INFO  security.ShellBasedUnixGroupsMapping (ShellBasedUnixGroupsMapping.java:getUnixGroups(89)) - add hadoop to shell userGroupsCache
2013-10-31 15:23:42,964 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:23:42,980 WARN  snappy.LoadSnappy (LoadSnappy.java:<clinit>(46)) - Snappy native library not loaded
2013-10-31 15:23:42,984 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:23:43,191 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0001
2013-10-31 15:23:43,222 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:43,230 INFO  util.ProcessTree (ProcessTree.java:isSetsidSupported(63)) - setsid exited with exit code 0
2013-10-31 15:23:43,233 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@25e1e5e3
2013-10-31 15:23:43,238 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:23:43,243 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:23:43,249 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:23:43,291 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:23:43,291 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:23:43,311 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:23:43,311 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:23:43,377 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:23:43,377 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(euclidean)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:23:43,393 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:23:43,403 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:23:43,405 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:46,226 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:23:46,227 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0001_m_000000_0' done.
2013-10-31 15:23:46,242 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:46,244 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@6f28deae
2013-10-31 15:23:46,245 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:46,255 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:23:46,257 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:23:46,257 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:46,258 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:23:46,259 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:23:46,276 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(euclidean)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:23:46,276 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][3839691910/euclidean/]
2013-10-31 15:23:46,296 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:23:46,296 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:23:46,339 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:46,340 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:46,341 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0001_r_000000_0 is allowed to commit now
2013-10-31 15:23:46,343 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0001_r_000000_0' to file:/tmp/hadoop-hadoop/3839691910_euclidean_C0846ABE38034977BE9B391FA214A42A
2013-10-31 15:23:49,244 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:23:49,246 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0001_r_000000_0' done.
2013-10-31 15:23:49,260 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (2/3)
2013-10-31 15:23:49,264 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:23:49,265 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:23:49,278 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:23:49,281 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:23:49,336 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0002
2013-10-31 15:23:49,339 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:49,340 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@59fc982f
2013-10-31 15:23:49,341 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/3839691910_euclidean_C0846ABE38034977BE9B391FA214A42A/part-00000
2013-10-31 15:23:49,346 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:23:49,347 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:23:49,388 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:23:49,388 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:23:49,389 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:23:49,390 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:23:49,400 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][3839691910/euclidean/]
2013-10-31 15:23:49,400 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:23:49,439 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:23:49,473 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:23:49,475 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0002_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:52,341 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/3839691910_euclidean_C0846ABE38034977BE9B391FA214A42A/part-00000:0+13816
2013-10-31 15:23:52,343 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0002_m_000000_0' done.
2013-10-31 15:23:52,348 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:52,351 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@2cb10995
2013-10-31 15:23:52,351 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:52,354 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:23:52,355 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:23:52,355 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:52,357 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:23:52,357 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:23:52,378 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:23:52,378 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][6500997619/euclidean/]
2013-10-31 15:23:52,401 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0002_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:52,402 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:52,403 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0002_r_000000_0 is allowed to commit now
2013-10-31 15:23:52,404 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0002_r_000000_0' to file:/tmp/hadoop-hadoop/6500997619_euclidean_A94081EE9B6F445982476D4B3E68E0EE
2013-10-31 15:23:55,350 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:23:55,352 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0002_r_000000_0' done.
2013-10-31 15:23:55,361 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (3/3) ...tform/euclidean/composite
2013-10-31 15:23:55,365 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:23:55,365 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:23:55,383 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:23:55,389 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:23:55,432 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0003
2013-10-31 15:23:55,434 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:55,436 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3feb908d
2013-10-31 15:23:55,438 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/6500997619_euclidean_A94081EE9B6F445982476D4B3E68E0EE/part-00000
2013-10-31 15:23:55,441 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:23:55,441 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:23:55,520 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:23:55,520 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:23:55,522 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:23:55,522 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:23:55,538 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][6500997619/euclidean/]
2013-10-31 15:23:55,538 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{2}:'n1', 'n2']]
2013-10-31 15:23:55,552 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:23:55,560 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:23:55,561 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0003_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:58,436 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/6500997619_euclidean_A94081EE9B6F445982476D4B3E68E0EE/part-00000:0+6943
2013-10-31 15:23:58,438 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0003_m_000000_0' done.
2013-10-31 15:23:58,443 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:58,445 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@95ac5d5
2013-10-31 15:23:58,445 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:58,447 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:23:58,448 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 6467 bytes
2013-10-31 15:23:58,448 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:58,450 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:23:58,450 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:23:58,462 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{2}:'n1', 'n2']]
2013-10-31 15:23:58,462 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite"]
2013-10-31 15:23:58,474 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0003_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:58,474 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:23:58,475 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0003_r_000000_0 is allowed to commit now
2013-10-31 15:23:58,477 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0003_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite
2013-10-31 15:24:01,445 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:01,447 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0003_r_000000_0' done.
2013-10-31 15:24:01,460 INFO  util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite/_temporary
2013-10-31 15:24:01,496 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:01,511 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:01,512 INFO  cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/euclidean/composite
2013-10-31 15:24:01,529 INFO  util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:24:01,529 INFO  planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:24:01,580 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] starting
2013-10-31 15:24:01,580 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson]  source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:01,580 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson]  sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite"]
2013-10-31 15:24:01,581 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson]  parallel execution is enabled: false
2013-10-31 15:24:01,581 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson]  starting jobs: 3
2013-10-31 15:24:01,581 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson]  allocating threads: 1
2013-10-31 15:24:01,581 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] starting step: (1/3)
2013-10-31 15:24:01,583 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:01,583 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:01,590 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:01,592 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:01,624 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] submitted hadoop job: job_local_0004
2013-10-31 15:24:01,626 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:01,627 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@509dcbbb
2013-10-31 15:24:01,628 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:24:01,629 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:01,629 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:01,648 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:01,648 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:01,649 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:01,649 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:01,659 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:01,659 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(pearson)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:24:01,663 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:01,665 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:01,667 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0004_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:04,629 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:24:04,630 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0004_m_000000_0' done.
2013-10-31 15:24:04,635 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:04,637 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@2379fda4
2013-10-31 15:24:04,637 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:04,639 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:04,640 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:24:04,640 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:04,641 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:04,642 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:04,659 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(pearson)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:24:04,659 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][992065113/pearson/]
2013-10-31 15:24:04,662 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:04,662 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:04,685 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0004_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:04,685 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:04,686 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0004_r_000000_0 is allowed to commit now
2013-10-31 15:24:04,687 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0004_r_000000_0' to file:/tmp/hadoop-hadoop/992065113_pearson_A68FB44274DB45FAA3ED835AA45AF484
2013-10-31 15:24:07,636 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:07,638 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0004_r_000000_0' done.
2013-10-31 15:24:07,645 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] starting step: (2/3)
2013-10-31 15:24:07,650 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:07,651 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:07,667 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:07,672 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:07,700 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] submitted hadoop job: job_local_0005
2013-10-31 15:24:07,702 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:07,703 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@4e2b5fab
2013-10-31 15:24:07,704 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/992065113_pearson_A68FB44274DB45FAA3ED835AA45AF484/part-00000
2013-10-31 15:24:07,705 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:07,705 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:07,746 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:07,747 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:07,748 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:07,748 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:07,755 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][992065113/pearson/]
2013-10-31 15:24:07,756 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(pearson)[by:[{?}:ALL]]
2013-10-31 15:24:07,766 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:07,777 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:07,778 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0005_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:10,704 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/992065113_pearson_A68FB44274DB45FAA3ED835AA45AF484/part-00000:0+13816
2013-10-31 15:24:10,706 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0005_m_000000_0' done.
2013-10-31 15:24:10,710 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:10,712 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@c7d2c2b
2013-10-31 15:24:10,712 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:10,714 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:10,715 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:24:10,715 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:10,716 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:10,716 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:10,723 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(pearson)[by:[{?}:ALL]]
2013-10-31 15:24:10,723 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][920026181/pearson/]
2013-10-31 15:24:10,737 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0005_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:10,737 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:10,738 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0005_r_000000_0 is allowed to commit now
2013-10-31 15:24:10,739 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0005_r_000000_0' to file:/tmp/hadoop-hadoop/920026181_pearson_8CC62A8EF333437F8EE6D3DD00F5BFB1
2013-10-31 15:24:13,712 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:13,713 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0005_r_000000_0' done.
2013-10-31 15:24:15,290 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] starting step: (3/3) ...latform/pearson/composite
2013-10-31 15:24:15,294 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:15,295 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:15,311 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:15,314 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:15,343 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] submitted hadoop job: job_local_0006
2013-10-31 15:24:15,348 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:15,350 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@40bde86b
2013-10-31 15:24:15,352 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/920026181_pearson_8CC62A8EF333437F8EE6D3DD00F5BFB1/part-00000
2013-10-31 15:24:15,354 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:15,354 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:15,396 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:15,396 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:15,397 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:15,397 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:15,405 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][920026181/pearson/]
2013-10-31 15:24:15,405 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(pearson)[by:[{2}:'n1', 'n2']]
2013-10-31 15:24:15,410 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:15,413 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:15,415 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0006_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:18,351 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/920026181_pearson_8CC62A8EF333437F8EE6D3DD00F5BFB1/part-00000:0+6943
2013-10-31 15:24:18,352 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0006_m_000000_0' done.
2013-10-31 15:24:18,357 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:18,358 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@5ea7f1b1
2013-10-31 15:24:18,359 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:18,361 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:18,361 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 6467 bytes
2013-10-31 15:24:18,362 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:18,363 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:18,363 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:18,377 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(pearson)[by:[{2}:'n1', 'n2']]
2013-10-31 15:24:18,378 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite"]
2013-10-31 15:24:18,483 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0006_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:18,484 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:18,486 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0006_r_000000_0 is allowed to commit now
2013-10-31 15:24:18,523 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0006_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite
2013-10-31 15:24:21,358 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:21,360 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0006_r_000000_0' done.
2013-10-31 15:24:21,368 INFO  util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite/_temporary
2013-10-31 15:24:21,391 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:21,403 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:21,404 INFO  cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/pearson/composite
2013-10-31 15:24:21,415 INFO  util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:24:21,415 INFO  planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:24:21,462 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting
2013-10-31 15:24:21,463 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:21,463 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long"]
2013-10-31 15:24:21,463 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  parallel execution is enabled: false
2013-10-31 15:24:21,463 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  starting jobs: 3
2013-10-31 15:24:21,463 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  allocating threads: 1
2013-10-31 15:24:21,464 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (1/3)
2013-10-31 15:24:21,468 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:21,469 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:21,478 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:21,480 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:21,503 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0007
2013-10-31 15:24:21,507 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:21,509 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3f025aba
2013-10-31 15:24:21,511 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:24:21,513 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:21,513 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:21,547 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:21,548 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:21,554 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:21,554 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:21,562 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:21,562 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:21,565 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:21,566 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:21,568 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0007_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:24,510 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:24:24,511 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0007_m_000000_0' done.
2013-10-31 15:24:24,516 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:24,517 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@59b4ba80
2013-10-31 15:24:24,518 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:24,520 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:24,520 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:24:24,521 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:24,522 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:24,522 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:24,536 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:24,536 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1918939469/euclidean/]
2013-10-31 15:24:24,539 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:24,539 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:24,556 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0007_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:24,556 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:24,557 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0007_r_000000_0 is allowed to commit now
2013-10-31 15:24:24,558 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0007_r_000000_0' to file:/tmp/hadoop-hadoop/1918939469_euclidean_525A16B2C8C44B0D8960271BC5547C64
2013-10-31 15:24:27,517 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:27,519 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0007_r_000000_0' done.
2013-10-31 15:24:27,529 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (2/3)
2013-10-31 15:24:27,533 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:27,534 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:27,547 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:27,551 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:27,575 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0008
2013-10-31 15:24:27,579 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:27,581 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@31e4c806
2013-10-31 15:24:27,583 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/1918939469_euclidean_525A16B2C8C44B0D8960271BC5547C64/part-00000
2013-10-31 15:24:27,585 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:27,586 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:27,611 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:27,613 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:27,622 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:27,622 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:27,628 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1918939469/euclidean/]
2013-10-31 15:24:27,628 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:27,635 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:27,643 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:27,643 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0008_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:30,581 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/1918939469_euclidean_525A16B2C8C44B0D8960271BC5547C64/part-00000:0+13816
2013-10-31 15:24:30,582 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0008_m_000000_0' done.
2013-10-31 15:24:30,584 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:30,584 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@66dab355
2013-10-31 15:24:30,584 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:30,585 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:30,585 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:24:30,585 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:30,586 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:30,586 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:30,592 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:30,592 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][1388883191/euclidean/]
2013-10-31 15:24:30,607 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0008_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:30,607 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:30,608 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0008_r_000000_0 is allowed to commit now
2013-10-31 15:24:30,609 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0008_r_000000_0' to file:/tmp/hadoop-hadoop/1388883191_euclidean_442F50A935FB450BA740526685D6A7FE
2013-10-31 15:24:33,585 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:33,586 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0008_r_000000_0' done.
2013-10-31 15:24:33,593 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (3/3) ...seplatform/euclidean/long
2013-10-31 15:24:33,597 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:33,597 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:33,610 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:33,614 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:33,632 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0009
2013-10-31 15:24:33,636 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:33,637 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3f9b4e74
2013-10-31 15:24:33,638 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/1388883191_euclidean_442F50A935FB450BA740526685D6A7FE/part-00000
2013-10-31 15:24:33,639 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:33,640 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:33,669 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:33,670 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:33,671 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:33,671 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:33,679 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][1388883191/euclidean/]
2013-10-31 15:24:33,679 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:33,688 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:33,692 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:33,694 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0009_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:36,638 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/1388883191_euclidean_442F50A935FB450BA740526685D6A7FE/part-00000:0+7726
2013-10-31 15:24:36,639 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0009_m_000000_0' done.
2013-10-31 15:24:36,644 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:36,646 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@82c0480
2013-10-31 15:24:36,646 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:36,648 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:36,649 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 7250 bytes
2013-10-31 15:24:36,649 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:36,650 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:36,651 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:36,660 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:36,661 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long"]
2013-10-31 15:24:36,670 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0009_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:36,670 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:36,671 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0009_r_000000_0 is allowed to commit now
2013-10-31 15:24:36,672 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0009_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long
2013-10-31 15:24:39,646 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:39,648 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0009_r_000000_0' done.
2013-10-31 15:24:39,661 INFO  util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long/_temporary
2013-10-31 15:24:39,681 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:39,698 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:39,699 INFO  cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/euclidean/long
2013-10-31 15:24:39,708 INFO  util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:24:39,708 INFO  planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:24:39,745 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting
2013-10-31 15:24:39,746 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:39,746 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short"]
2013-10-31 15:24:39,747 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  parallel execution is enabled: false
2013-10-31 15:24:39,747 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  starting jobs: 3
2013-10-31 15:24:39,747 INFO  flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean]  allocating threads: 1
2013-10-31 15:24:39,748 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (1/3)
2013-10-31 15:24:39,752 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:39,753 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:39,766 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:39,769 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:39,790 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0010
2013-10-31 15:24:39,794 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:39,796 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@34fdf528
2013-10-31 15:24:39,798 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:24:39,800 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:39,801 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:39,831 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:39,832 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:39,833 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:39,833 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:39,840 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:39,840 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:39,842 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:39,844 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:39,845 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0010_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:42,797 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:24:42,798 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0010_m_000000_0' done.
2013-10-31 15:24:42,803 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:42,805 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@fde0a96
2013-10-31 15:24:42,805 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:42,808 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:42,809 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:24:42,809 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:42,810 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:42,811 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:42,820 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:42,820 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1676381248/euclidean/]
2013-10-31 15:24:42,823 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:42,823 INFO  collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:42,840 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0010_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:42,840 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:42,841 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0010_r_000000_0 is allowed to commit now
2013-10-31 15:24:42,843 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0010_r_000000_0' to file:/tmp/hadoop-hadoop/1676381248_euclidean_39667152B5F44457B3895155EA3524C9
2013-10-31 15:24:45,804 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:45,806 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0010_r_000000_0' done.
2013-10-31 15:24:45,812 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (2/3)
2013-10-31 15:24:45,816 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:45,817 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:45,830 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:45,834 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:45,852 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0011
2013-10-31 15:24:45,854 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:45,855 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@9b64fac
2013-10-31 15:24:45,856 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/1676381248_euclidean_39667152B5F44457B3895155EA3524C9/part-00000
2013-10-31 15:24:45,856 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:45,857 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:45,880 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:45,880 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:45,881 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:45,881 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:45,887 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1676381248/euclidean/]
2013-10-31 15:24:45,887 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:45,893 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:45,898 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:45,899 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0011_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:48,856 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/1676381248_euclidean_39667152B5F44457B3895155EA3524C9/part-00000:0+13816
2013-10-31 15:24:48,857 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0011_m_000000_0' done.
2013-10-31 15:24:48,861 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:48,863 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@13515ded
2013-10-31 15:24:48,863 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:48,865 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:48,866 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:24:48,866 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:48,867 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:48,868 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:48,878 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:48,878 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][6369659947/euclidean/]
2013-10-31 15:24:48,893 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0011_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:48,894 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:48,894 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0011_r_000000_0 is allowed to commit now
2013-10-31 15:24:48,896 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0011_r_000000_0' to file:/tmp/hadoop-hadoop/6369659947_euclidean_D8605651C8F7468EABC0EE5A294460CC
2013-10-31 15:24:51,863 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:51,864 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0011_r_000000_0' done.
2013-10-31 15:24:51,871 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (3/3) ...eplatform/euclidean/short
2013-10-31 15:24:51,874 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:51,875 INFO  mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:51,888 INFO  mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:51,892 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:51,910 INFO  flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0012
2013-10-31 15:24:51,913 INFO  mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:51,913 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@22c5967f
2013-10-31 15:24:51,914 INFO  io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/6369659947_euclidean_D8605651C8F7468EABC0EE5A294460CC/part-00000
2013-10-31 15:24:51,916 INFO  mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:51,916 INFO  mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:51,951 INFO  mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:51,951 INFO  mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:51,952 INFO  hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:51,952 INFO  hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts: 
2013-10-31 15:24:51,958 INFO  hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][6369659947/euclidean/]
2013-10-31 15:24:51,958 INFO  hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:51,962 INFO  mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:51,965 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:51,966 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0012_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:54,914 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/6369659947_euclidean_D8605651C8F7468EABC0EE5A294460CC/part-00000:0+7726
2013-10-31 15:24:54,915 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0012_m_000000_0' done.
2013-10-31 15:24:54,920 INFO  mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:54,921 INFO  mapred.Task (Task.java:initialize(534)) -  Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@65ae615a
2013-10-31 15:24:54,922 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:54,923 INFO  mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:54,924 INFO  mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 7250 bytes
2013-10-31 15:24:54,924 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:54,925 INFO  hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:54,926 INFO  hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts: 
2013-10-31 15:24:54,938 INFO  hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:54,938 INFO  hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short"]
2013-10-31 15:24:54,945 INFO  mapred.Task (Task.java:done(852)) - Task:attempt_local_0012_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:54,945 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - 
2013-10-31 15:24:54,945 INFO  mapred.Task (Task.java:commit(1005)) - Task attempt_local_0012_r_000000_0 is allowed to commit now
2013-10-31 15:24:54,947 INFO  mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0012_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short
2013-10-31 15:24:57,921 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:57,922 INFO  mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0012_r_000000_0' done.
2013-10-31 15:24:57,931 INFO  util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short/_temporary
2013-10-31 15:24:57,950 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:57,961 INFO  mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:57,963 INFO  cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/euclidean/short