dataframe - r data.frame create new variable -


i have dataframe around 1.5 million rows , 5 cols. 1 variable (variable) of type nationality_year (e.g. spain_1998) , want split in 2 columns, 1 containing nationality, left side of name before underscore, , 1 containing year, right side of underscore. have tried concat.split should easiest way:

aa <- concat.split(mydata, "variable", sep = "_", drop = f) 

but after 2 hours running did not produce output. not sure if should leave running longer period of time or if there non time consuming way this.

any on issue appreciated!

here reproducible (subset!) sample:

mydata<-  structure(list(province = c(1l, 4l, 7l, 8l, 11l, 14l, 17l, 20l,  24l, 28l, 30l, 33l, 36l, 41l, 44l, 46l, 48l, 3l, 6l, 8l, 10l,  13l, 15l, 18l, 23l, 26l, 29l, 31l, 35l, 38l, 41l, 46l, 47l, 2l,  4l, 8l, 8l, 11l, 15l, 17l, 21l, 24l, 28l, 30l, 33l, 37l, 41l,  45l, 46l, 49l, 3l, 6l, 8l, 10l, 13l, 15l, 19l, 23l, 27l, 29l,  32l, 36l, 39l, 43l, 46l, 48l, 2l, 5l, 8l, 8l, 12l, 15l, 18l,  21l, 24l, 28l, 30l, 33l, 37l, 41l, 45l, 46l, 50l, 3l, 7l, 8l,  10l, 14l, 16l, 20l, 23l, 27l, 29l, 32l, 36l, 39l, 43l, 46l, 48l,  3l, 6l, 8l, 8l, 12l, 15l, 18l, 21l, 25l, 28l, 31l, 34l, 38l,  41l, 45l, 46l, 50l, 3l, 7l, 8l, 11l, 14l, 17l, 20l, 23l, 27l,  29l, 33l, 36l, 40l, 43l, 46l, 48l, 3l, 6l, 8l, 9l, 12l, 15l,  18l, 22l, 25l, 28l, 31l, 35l, 38l, 41l, 45l, 46l, 50l, 4l, 7l,  8l, 11l, 14l, 17l, 20l, 24l, 28l, 30l, 33l, 36l, 41l, 43l, 46l,  48l, 3l, 6l, 8l, 10l, 13l, 15l, 18l, 22l, 26l, 28l, 31l, 35l,  38l, 41l, 46l, 47l, 1l, 4l, 8l, 8l, 11l, 14l, 17l, 20l, 24l,  28l, 30l, 33l, 36l, 41l, 44l, 46l, 49l, 3l, 6l), age5 = structure(c(1l,  5l, 9l, 7l, 6l, 7l, 5l, 8l, 3l, 3l, 3l, 5l, 8l, 2l, 3l, 6l, 9l,  5l, 7l, 4l, 3l, 5l, 8l, 8l, 2l, 8l, 2l, 9l, 7l, 9l, 9l, 2l, 7l,  2l, 9l, 1l, 8l, 8l, 1l, 8l, 1l, 6l, 4l, 6l, 7l, 2l, 3l, 1l, 7l,  5l, 6l, 9l, 5l, 6l, 8l, 9l, 3l, 4l, 3l, 4l, 4l, 1l, 3l, 1l, 2l,  2l, 6l, 6l, 2l, 9l, 2l, 2l, 1l, 5l, 9l, 5l, 8l, 9l, 7l, 4l, 3l,  7l, 2l, 8l, 2l, 6l, 9l, 1l, 5l, 1l, 6l, 6l, 6l, 7l, 3l, 6l, 3l,  3l, 4l, 1l, 1l, 2l, 9l, 6l, 4l, 3l, 8l, 3l, 7l, 1l, 5l, 2l, 6l,  6l, 8l, 5l, 9l, 5l, 6l, 2l, 3l, 1l, 4l, 8l, 9l, 8l, 1l, 5l, 1l,  6l, 4l, 6l, 2l, 3l, 3l, 5l, 9l, 5l, 5l, 4l, 7l, 8l, 4l, 2l, 5l,  7l, 8l, 9l, 8l, 3l, 7l, 7l, 5l, 6l, 3l, 6l, 1l, 2l, 2l, 3l, 7l,  1l, 9l, 5l, 8l, 4l, 5l, 4l, 1l, 3l, 7l, 7l, 9l, 3l, 9l, 7l, 5l,  7l, 8l, 1l, 4l, 4l, 6l, 1l, 8l, 7l, 8l, 6l, 8l, 4l, 3l, 4l, 5l,  9l, 2l, 6l, 6l, 1l, 5l, 7l), .label = c("10-14", "15-19", "20-24",  "25-29", "30-34", "35-39", "40-44", "45-49", "50-54"), class = "factor"),  zona91ok = c(101l, 4079l, 712l, 8205l, 11022l, 14021l, 1714l,  20067l, 2414l, 2810l, 300799l, 3305l, 36026l, 41024l, 4405l,  4607l, 48015l, 308l, 610l, 8121l, 1006l, 1307l, 1511l, 1813l,  2308l, 2605l, 2910l, 310799l, 35026l, 3811l, 411199l, 4601l,  4708l, 202l, 405l, 8015l, 837l, 11033l, 1502l, 1702l, 2112l,  2408l, 28047l, 30015l, 3305l, 3709l, 410199l, 4511l, 1202l,  490699l, 3063l, 610l, 827l, 1006l, 1301l, 15036l, 1901l,  2310l, 2709l, 29025l, 3201l, 36008l, 390899l, 4301l, 46184l,  4805l, 206l, 504l, 817l, 813l, 12135l, 1519l, 1810l, 2104l,  2402l, 28130l, 30030l, 3305l, 3707l, 411399l, 45165l, 46181l,  5008l, 305l, 7026l, 803l, 1006l, 1413l, 16078l, 200999l,  2312l, 2712l, 29069l, 3210l, 3616l, 391199l, 4313l, 46105l,  4805l, 310l, 6153l, 8252l, 8205l, 1205l, 1505l, 1808l, 2110l,  2508l, 2810l, 311399l, 3405l, 3807l, 41024l, 4507l, 46102l,  500599l, 3014l, 706l, 8121l, 11028l, 14042l, 1712l, 20045l,  2314l, 27031l, 29901l, 33024l, 3614l, 400199l, 4307l, 46021l,  4805l, 3066l, 6153l, 8015l, 901l, 12040l, 1522l, 1806l, 2203l,  2508l, 28047l, 311099l, 35004l, 3801l, 410199l, 4515l, 46017l,  501199l, 407l, 7027l, 827l, 1102l, 1404l, 17155l, 200599l,  24089l, 2812l, 30019l, 33024l, 3612l, 41038l, 4301l, 4628l,  4805l, 307l, 6153l, 817l, 1004l, 1309l, 1508l, 1804l, 2206l,  2606l, 28130l, 310799l, 35011l, 38022l, 411399l, 4622l, 4701l,  1036l, 4079l, 807l, 803l, 1108l, 1410l, 1708l, 201399l, 2410l,  28058l, 30043l, 33024l, 3610l, 410399l, 4401l, 4621l, 490499l,  3059l, 6153l), variable = structure(c(1l, 1l, 1l, 1l, 1l,  1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 2l, 2l, 2l,  2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 2l, 3l, 3l,  3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l, 3l,  4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l, 4l,  4l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l, 5l,  5l, 5l, 5l, 6l, 6l, 6l, 6l, 6l, 6l, 6l, 6l, 6l, 6l, 6l, 6l,  6l, 6l, 6l, 6l, 7l, 7l, 7l, 7l, 7l, 7l, 7l, 7l, 7l, 7l, 7l,  7l, 7l, 7l, 7l, 7l, 7l, 8l, 8l, 8l, 8l, 8l, 8l, 8l, 8l, 8l,  8l, 8l, 8l, 8l, 8l, 8l, 8l, 9l, 9l, 9l, 9l, 9l, 9l, 9l, 9l,  9l, 9l, 9l, 9l, 9l, 9l, 9l, 9l, 9l, 10l, 10l, 10l, 10l, 10l,  10l, 10l, 10l, 10l, 10l, 10l, 10l, 10l, 10l, 10l, 10l, 11l,  11l, 11l, 11l, 11l, 11l, 11l, 11l, 11l, 11l, 11l, 11l, 11l,  11l, 11l, 11l, 12l, 12l, 12l, 12l, 12l, 12l, 12l, 12l, 12l,  12l, 12l, 12l, 12l, 12l, 12l, 12l, 12l, 13l, 13l), .label = c("spain_1998",  "eu15dc_1998", "roe_1998", "magreb_1998", "ssa_1998", "la_1998",  "asia_1998", "row_1998", "total_1998", "spain_1999", "eu15dc_1999",  "roe_1999", "magreb_1999", "ssa_1999", "la_1999", "asia_1999",  "row_1999", "total_1999", "spain_2000", "eu15dc_2000", "roe_2000",  "magreb_2000", "ssa_2000", "la_2000", "asia_2000", "row_2000",  "total_2000", "spain_2001", "eu15dc_2001", "roe_2001", "magreb_2001",  "ssa_2001", "la_2001", "asia_2001", "row_2001", "total_2001",  "spain_2002", "eu15dc_2002", "roe_2002", "magreb_2002", "ssa_2002",  "la_2002", "asia_2002", "row_2002", "total_2002", "spain_2003",  "eu15dc_2003", "roe_2003", "magreb_2003", "ssa_2003", "la_2003",  "asia_2003", "row_2003", "total_2003", "spain_2004", "eu15dc_2004",  "roe_2004", "magreb_2004", "ssa_2004", "la_2004", "asia_2004",  "row_2004", "total_2004", "spain_2005", "eu15dc_2005", "roe_2005",  "magreb_2005", "ssa_2005", "la_2005", "asia_2005", "row_2005",  "total_2005", "spain_2006", "eu15dc_2006", "roe_2006", "magreb_2006",  "ssa_2006", "la_2006", "asia_2006", "row_2006", "total_2006",  "spain_2007", "eu15dc_2007", "roe_2007", "magreb_2007", "ssa_2007",  "la_2007", "asia_2007", "row_2007", "total_2007", "spain_2008",  "eu15dc_2008", "roe_2008", "magreb_2008", "ssa_2008", "la_2008",  "asia_2008", "row_2008", "total_2008", "spain_2009", "eu15dc_2009",  "roe_2009", "magreb_2009", "ssa_2009", "la_2009", "asia_2009",  "row_2009", "total_2009", "spain_2010", "eu15dc_2010", "roe_2010",  "magreb_2010", "ssa_2010", "la_2010", "asia_2010", "row_2010",  "total_2010", "spain_2011", "eu15dc_2011", "roe_2011", "magreb_2011",  "ssa_2011", "la_2011", "asia_2011", "row_2011", "total_2011",  "spain_2012", "eu15dc_2012", "roe_2012", "magreb_2012", "ssa_2012",  "la_2012", "asia_2012", "row_2012", "total_2012", "notspain_1998",  "notspain_1999", "notspain_2000", "notspain_2001", "notspain_2002",  "notspain_2003", "notspain_2004", "notspain_2005", "notspain_2006",  "notspain_2007", "notspain_2008", "notspain_2009", "notspain_2010",  "notspain_2011", "notspain_2012", "africa_1998", "africa_1999",  "africa_2000", "africa_2001", "africa_2002", "africa_2003",  "africa_2004", "africa_2005", "africa_2006", "africa_2007",  "africa_2008", "africa_2009", "africa_2010", "africa_2011",  "africa_2012", "dwc_1998", "dwc_1999", "dwc_2000", "dwc_2001",  "dwc_2002", "dwc_2003", "dwc_2004", "dwc_2005", "dwc_2006",  "dwc_2007", "dwc_2008", "dwc_2009", "dwc_2010", "dwc_2011",  "dwc_2012"), class = "factor"), frequency = c(614, 1943,  59, 201, 188, 10859, 93,  1494, 60, 1001, 1000, 689, 675, 934, 51,  1240, 165, 13, 0, 14, 2, 2,  2, 0, 3, 0, 40, 1, 18, 41, 1, 0, 3, 0, 0, 0, 1, 0,  0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 80, 0,  0, 0, 4, 0, 0, 15, 0, 0, 1, 1, 3, 4, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2, 11, 0, 0, 0, 3, 2, 1, 5,  64, 1, 4, 1, 3, 4, 8, 1, 1, 1, 1, 0, 0, 0,  0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 2173, 907, 9059, 839,  4303, 100, 1727, 663, 694, 1210, 623,  1261, 772, 697, 490, 1031, 490, 956, 704,  1293, 1011, 739, 927, 755, 3340, 1190, 1254, 12880, 528,  3244, 277, 892, 837, 1, 2, 10, 1, 1, 2, 2, 0, 0, 1, 8, 3,  12, 0, 2, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 12, 0, 7, 0, 0, 0,  0, 0, 5, 2)), .names = c("province", "age5", "zona91ok",  "variable", "frequency"), row.names = c(1l, 501l, 1001l, 1501l,  2001l, 2501l, 3001l, 3501l, 4001l, 4501l, 5001l, 5501l, 6001l,  6501l, 7001l, 7501l, 8001l, 8501l, 9001l, 9501l, 10001l, 10501l,  11001l, 11501l, 12001l, 12501l, 13001l, 13501l, 14001l, 14501l,  15001l, 15501l, 16001l, 16501l, 17001l, 17501l, 18001l, 18501l,  19001l, 19501l, 20001l, 20501l, 21001l, 21501l, 22001l, 22501l,  23001l, 23501l, 24001l, 24501l, 25001l, 25501l, 26001l, 26501l,  27001l, 27501l, 28001l, 28501l, 29001l, 29501l, 30001l, 30501l,  31001l, 31501l, 32001l, 32501l, 33001l, 33501l, 34001l, 34501l,  35001l, 35501l, 36001l, 36501l, 37001l, 37501l, 38001l, 38501l,  39001l, 39501l, 40001l, 40501l, 41001l, 41501l, 42001l, 42501l,  43001l, 43501l, 44001l, 44501l, 45001l, 45501l, 46001l, 46501l,  47001l, 47501l, 48001l, 48501l, 49001l, 49501l, 50001l, 50501l,  51001l, 51501l, 52001l, 52501l, 53001l, 53501l, 54001l, 54501l,  55001l, 55501l, 56001l, 56501l, 57001l, 57501l, 58001l, 58501l,  59001l, 59501l, 60001l, 60501l, 61001l, 61501l, 62001l, 62501l,  63001l, 63501l, 64001l, 64501l, 65001l, 65501l, 66001l, 66501l,  67001l, 67501l, 68001l, 68501l, 69001l, 69501l, 70001l, 70501l,  71001l, 71501l, 72001l, 72501l, 73001l, 73501l, 74001l, 74501l,  75001l, 75501l, 76001l, 76501l, 77001l, 77501l, 78001l, 78501l,  79001l, 79501l, 80001l, 80501l, 81001l, 81501l, 82001l, 82501l,  83001l, 83501l, 84001l, 84501l, 85001l, 85501l, 86001l, 86501l,  87001l, 87501l, 88001l, 88501l, 89001l, 89501l, 90001l, 90501l,  91001l, 91501l, 92001l, 92501l, 93001l, 93501l, 94001l, 94501l,  95001l, 95501l, 96001l, 96501l, 97001l, 97501l, 98001l, 98501l,  99001l, 99501l), class = "data.frame") 

try instead:

library(data.table) dt = data.table(mydata)  dt[, `:=`(nationality = sub('(.*)_(.*)', '\\1', variable),           year        = sub('(.*)_(.*)', '\\2', variable))] 

Comments

Popular posts from this blog

c++ - CryptStringToBinary API behavior -

c++ - Correct method for redrawing a layered window -

java.util.scanner - How to read and add only numbers to array from a text file -