Ann*_*nna 2 r economics dplyr data-cleaning
我有一个包含工资记录的 df 。对于每个重复的 PersonID-CalendarYear 组合,我想保留 WagesQ1:WagesQ4 列中的最高值。我还希望这些值都位于同一行。
例如,PersonID #49 有 3 行代表 2017 年。我想保留每个工资列中的最高值,这样我最终会得到 1 行代表 PersonID #49 和 2017 年。
到目前为止,我尝试了以下操作,但这会将每个 PersonID-CalendarYear 组合重复 4 次,这根本不是我想要的。
test <- Emp %>%
group_by(PersonID, CalendarYear) %>%
slice(which.max(WagesQ1),
which.max(WagesQ2),
which.max(WagesQ3),
which.max(WagesQ4))
Run Code Online (Sandbox Code Playgroud)
我在这里分享 df 的前 50 行:
structure(list(PersonID = c("30", "30", "30", "30", "30", "30",
"30", "30", "30", "30", "30", "30", "30", "30", "30", "30", "30",
"30", "30", "30", "30", "30", "36", "49", "49", "49", "49", "49",
"49", "49", "49", "49", "49", "49", "49", "49", "49", "49", "49",
"49", "49", "49", "49", "49", "49", "49", "49", "49", "49", "49"
), CalendarYear = c(2003L, 2004L, 2005L, 2006L, 2006L, 2007L,
2008L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L,
2016L, 2017L, 2017L, 2018L, 2019L, 2020L, 2021L, 2018L, 2003L,
2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L,
2012L, 2013L, 2014L, 2014L, 2015L, 2015L, 2015L, 2016L, 2016L,
2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2019L, 2020L), County = c("181",
"181", "181", "53", "175", "175", "53", "175", "53", "53", "53",
"53", "53", "53", "53", "53", "53", "53", "53", "53", "53", "53",
"9", "169", "169", "169", "169", "169", "169", "169", "169",
"169", "169", "169", "169", "169", "169", "157", "169", "169",
"157", "169", "169", "157", "169", "169", "169", "169", "169",
"169"), industryID = c(844L, 844L, 844L, 418L, 829L, 829L, 418L,
829L, 418L, 418L, 418L, 418L, 418L, 418L, 418L, 418L, 411L, 418L,
411L, 411L, 411L, 411L, 10L, 410L, 1037L, 1037L, 1037L, 1037L,
1037L, 1037L, 1037L, 1037L, 1037L, 1037L, 1037L, 892L, 1037L,
892L, 892L, 1037L, 892L, 892L, 1037L, 892L, 892L, 1037L, 892L,
1037L, 1037L, 1037L), OwnershipCode_Qtr1 = c("50", "50", "50",
"50", NA, "50", NA, "50", "50", "50", "50", "50", "50", "50",
"50", "50", NA, "50", "50", "50", "50", "50", NA, "50", NA, "30",
"30", "30", "30", "30", "30", "30", "30", "30", "30", "50", "30",
NA, "50", "30", NA, "50", "30", NA, "50", "30", "50", "30", "30",
"30"), EmployeeQuarterlyHoursWorked_Qtr1 = c("560", "480", "520",
"520", NA, "520", NA, "520", "480", "432", "480", "480", "417",
"371", "404", "384", NA, "488", "479", "454", "476", "428", NA,
"520", NA, "369", "386", "449", "457", "416", "448", "384", "389",
"384", "388", "23", "386", NA, "1", "384", NA, "6", "384", NA,
"6", "560", "60", "560", "560", "560"), EmployeeQuarterlyWages_Qtr1 = c("25974.00",
"21961.00", "40279.00", "28924.00", "0", "47695.00", "0", "51586.00",
"53059.00", "34615.00", "51267.00", "65367.00", "65935.00", "66764.00",
"68044.00", "67817.00", "0", "68835.00", "70923.00", "67671.00",
"80230.00", "54924.00", "0", "6977.00", "0", "6106.00", "7207.00",
"8640.00", "8976.00", "8576.00", "9801.00", "8275.00", "8275.00",
"8275.00", "8474.00", "376.00", "8677.00", "0", "750.00", "8883.00",
"0", "500.00", "9356.00", "0", "441.00", "16433.00", "490.00",
"17652.00", "18791.00", "20521.00"), EmployeeTenure_Qtr1 = c("3",
"7", "11", "1", NA, "3", NA, "7", "4", "8", "12", "16", "20",
"24", "28", "32", NA, "36", "2", "6", "10", "14", NA, "16", NA,
"2", "6", "10", "14", "18", "22", "26", "30", "34", "38", "1",
"42", NA, "1", "46", NA, "2", "50", NA, "2", "54", "2", "58",
"62", "66"), EmployerLocationTotalWagesPaid_Qtr1 = c("2705239.00",
"2655546.00", "3060808.00", "56494722.00", NA, "11577972.00",
NA, "7905850.00", "67523518.00", "43414753.00", "59875257.00",
"70830126.00", "77209774.00", "57210242.00", "72471506.00", "78008581.00",
NA, "69864117.00", "12839820.00", "14455450.00", "16005038.00",
"11267450.00", NA, "7107073.00", NA, "1900914.00", "2031601.00",
"2495287.00", "2637867.00", "2678876.00", "2780822.00", "2219695.00",
"2268540.00", "2120709.00", "2231025.00", "672803.00", "2234705.00",
NA, "734496.00", "2228583.00", NA, "611993.00", "2408250.00",
NA, "646954.00", "2993537.00", "622919.00", "3193565.00", "3395497.00",
"3553823.00"), QuarterlyAverageEmployment_Qtr1 = c("97", "98",
"105", "2385", NA, "362", NA, "240", "1952", "1760", "1715",
"1710", "1617", "1640", "1606", "1595", NA, "1470", "389", "443",
"461", "402", NA, "762", NA, "260", "251", "254", "265", "269",
"259", "235", "241", "235", "233", "106", "227", NA, "97", "224",
NA, "97", "221", NA, "94", "238", "92", "236", "239", "241"),
OwnershipCode_Qtr2 = c("50", "50", "50", "50", NA, "50",
"50", NA, "50", "50", "50", "50", "50", "50", "50", "50",
NA, "50", "50", "50", "50", NA, NA, "50", NA, "30", "30",
"30", "30", "30", "30", "30", "30", "30", "30", NA, "30",
"30", NA, "30", "30", "50", "30", "30", "50", "30", "50",
"30", "30", "30"), EmployeeQuarterlyHoursWorked_Qtr2 = c("480",
"520", "520", "520", NA, "520", "520", NA, "448", "464",
"464", "508", "536", "489", "495", "492", NA, "422", "423",
"424", "384", NA, NA, "520", NA, "448", "450", "386", "386",
"255", "387", "387", "385", "449", "451", NA, "456", "1",
NA, "455", "3", "2", "457", "6", "5", "480", "20", "480",
"480", "480"), EmployeeQuarterlyWages_Qtr2 = c("21911.00",
"22970.00", "29353.00", "28490.00", "0", "29964.00", "34491.00",
"0", "32600.00", "32250.00", "37819.00", "41553.00", "42701.00",
"44442.00", "49143.00", "46457.00", "0", "41058.00", "42066.00",
"41826.00", "34726.00", "0", "0", "7182.00", "0", "7934.00",
"8408.00", "7426.00", "7726.00", "5293.00", "8275.00", "8275.00",
"8275.00", "9830.00", "9896.00", "0", "10147.00", "80.00",
"0", "10423.00", "80.00", "166.00", "11007.00", "50.00",
"367.00", "14214.00", "163.00", "15188.00", "16168.00", "18067.00"
), EmployeeTenure_Qtr2 = c("4", "8", "12", "2", NA, "4",
"1", NA, "5", "9", "13", "17", "21", "25", "29", "33", NA,
"37", "3", "7", "11", NA, NA, "17", NA, "3", "7", "11", "15",
"19", "23", "27", "31", "35", "39", NA, "43", "1", NA, "47",
"1", "3", "51", "1", "3", "55", "3", "59", "63", "67"), EmployerLocationTotalWagesPaid_Qtr2 = c("2273677.00",
"2360550.00", "2501736.00", "46874830.00", NA, "8604560.00",
"42606565.00", NA, "36796585.00", "36680417.00", "44117859.00",
"46342635.00", "49910970.00", "59437204.00", "53698147.00",
"54838150.00", NA, "39797895.00", "10375288.00", "11095997.00",
"10321467.00", NA, NA, "6344850.00", NA, "2282968.00", "2439921.00",
"2159125.00", "2375421.00", "2373332.00", "2368912.00", "2203507.00",
"2184714.00", "2570868.00", "2605639.00", NA, "2692559.00",
"968147.00", NA, "2782204.00", "966568.00", "638154.00",
"2802058.00", "1024873.00", "683685.00", "2645041.00", "660148.00",
"2811118.00", "2986976.00", "3153488.00"), QuarterlyAverageEmployment_Qtr2 = c("98",
"100", "103", "2379", NA, "358", "1935", NA, "1776", "1682",
"1739", "1703", "1660", "1658", "1586", "1606", NA, "1459",
"414", "438", "450", NA, NA, "715", NA, "276", "258", "283",
"300", "305", "285", "264", "261", "273", "275", NA, "273",
"103", NA, "268", "104", "88", "269", "115", "85", "274",
"80", "278", "268", "208"), OwnershipCode_Qtr3 = c("50",
"50", "50", NA, "50", "50", "50", NA, "50", "50", "50", "50",
"50", "50", "50", "50", NA, "50", "50", "50", "50", NA, "50",
"50", NA, "30", "30", "30", "30", "30", "30", "30", "30",
"30", "30", NA, "30", NA, NA, "30", NA, NA, "30", NA, NA,
"30", NA, "30", "30", "30"), EmployeeQuarterlyHoursWorked_Qtr3 = c("520",
"520", "520", NA, "520", "520", "520", NA, "488", "544",
"560", "399", "426", "396", "443", "415", NA, "489", "489",
"524", "488", NA, "839", "520", NA, "384", "449", "446",
"451", "418", "452", "450", "449", "390", "389", NA, "384",
NA, NA, "384", NA, NA, "545", NA, NA, "560", NA, "560", "560",
"560"), EmployeeQuarterlyWages_Qtr3 = c("25563.00", "26941.00",
"29161.00", "0", "33522.00", "37714.00", "37519.00", "0",
"33862.00", "37625.00", "42420.00", "37400.00", "36718.00",
"38339.00", "39777.00", "43092.00", "0", "48008.00", "49174.00",
"48985.00", "45297.00", "0", "12678.00", "3644.00", "0",
"7065.00", "8494.00", "8737.00", "9013.00", "8619.00", "9654.00",
"9654.00", "9654.00", "8394.00", "8521.00", "0", "8697.00",
"0", "0", "8912.00", "0", "0", "14892.00", "0", "0", "17151.00",
"0", "18305.00", "19518.00", "21078.00"), EmployeeTenure_Qtr3 = c("5",
"9", "13", NA, "1", "5", "2", NA, "6", "10", "14", "18",
"22", "26", "30", "34", NA, "38", "4", "8", "12", NA, "1",
"18", NA, "4", "8", "12", "16", "20", "24", "28", "32", "36",
"40", NA, "44", NA, NA, "48", NA, NA, "52", NA, NA, "56",
NA, "60", "64", "68"), EmployerLocationTotalWagesPaid_Qtr3 = c("2561942.00",
"2836345.00", "2901631.00", NA, "6006025.00", "9244633.00",
"45594699.00", NA, "40660417.00", "42996310.00", "46205456.00",
"41682396.00", "41350115.00", "43793052.00", "42007503.00",
"54221071.00", NA, "44139475.00", "12277604.00", "12920820.00",
"12261562.00", NA, "841949.00", "5433154.00", NA, "2077482.00",
"2588350.00", "2625862.00", "2810502.00", "2888481.00", "2855988.00",
"2737480.00", "2717807.00", "2322919.00", "2346778.00", NA,
"2349394.00", NA, NA, "2424185.00", NA, NA, "3049769.00",
NA, NA, "3227870.00", NA, "3388103.00", "3536607.00", "3555916.00"
), QuarterlyAverageEmployment_Qtr3 = c("102", "110", "104",
NA, "292", "355", "1962", NA, "1814", "1730", "1742", "1703",
"1645", "1663", "1624", "1622", NA, "1461", "419", "443",
"433", NA, "96", "665", NA, "330", "335", "339", "354", "358",
"339", "356", "321", "334", "334", NA, "326", NA, NA, "322",
NA, NA, "336", NA, NA, "332", NA, "342", "340", "231"), OwnershipCode_Qtr4 = c("50",
"50", "50", NA, "50", "50", "50", NA, "50", "50", "50", "50",
"50", "50", "50", "50", "50", NA, "50", "50", "50", NA, "50",
"50", "30", "30", "30", "30", "30", "30", "30", "30", "30",
"30", "30", NA, "30", NA, "50", "30", NA, "50", "30", NA,
"50", "30", "50", "30", "30", "30"), EmployeeQuarterlyHoursWorked_Qtr4 = c("520",
"520", "520", NA, "520", "520", "520", NA, "520", "536",
"480", "499", "429", "490", "506", "426", "430", NA, "375",
"416", "500", NA, "772", "520", "293", "458", "395", "400",
"394", "387", "457", "450", "449", "460", "458", NA, "451",
NA, "4", "454", NA, "6", "480", NA, "30", "480", "0", "480",
"480", "560"), EmployeeQuarterlyWages_Qtr4 = c("25588.00",
"28944.00", "31548.00", "0", "35997.00", "35253.00", "37625.00",
"0", "43087.00", "38574.00", "34017.00", "61003.00", "43721.00",
"44729.00", "46407.00", "39777.00", "41149.00", "0", "42559.00",
"47402.00", "49078.00", "0", "5737.00", "1634.00", "4758.00",
"8287.00", "7280.00", "7499.00", "7726.00", "7956.00", "9654.00",
"10013.00", "10013.00", "10160.00", "10314.00", "0", "10524.00",
"0", "333.00", "10784.00", "0", "441.00", "13338.00", "0",
"245.00", "14782.00", "145.00", "15774.00", "16900.00", "21078.00"
), EmployeeTenure_Qtr4 = c("6", "10", "14", NA, "2", "6",
"3", NA, "7", "11", "15", "19", "23", "27", "31", "35", "1",
NA, "5", "9", "13", NA, "2", "19", "1", "5", "9", "13", "17",
"21", "25", "29", "33", "37", "41", NA, "45", NA, "1", "49",
NA, "1", "53", NA, "1", "57", "1", "61", "65", "69"), EmployerLocationTotalWagesPaid_Qtr4 = c("2592689.00",
"2786088.00", "2805669.00", NA, "5740894.00", "6721161.00",
"46794858.00", NA, "42277545.00", "47369971.00", "45689406.00",
"52769353.00", "51278020.00", "50198997.00", "53838385.00",
"43169822.00", "8467677.00", NA, "10719468.00", "11308861.00",
"12149650.00", NA, "316437.00", "6584282.00", "2353924.00",
"2497289.00", "2250699.00", "2328109.00", "2456573.00", "2497301.00",
"2844437.00", "2796565.00", "2739956.00", "2725955.00", "2746522.00",
NA, "2805607.00", NA, "621933.00", "2831090.00", NA, "628393.00",
"2498316.00", NA, "647813.00", "2589627.00", "656515.00",
"2813896.00", "2945372.00", "2961453.00"), QuarterlyAverageEmployment_Qtr4 = c("100",
"106", "100", NA, "279", "300", "1944", NA, "1767", "1738",
"1743", "1704", "1646", "1579", "1595", "1538", "354", NA,
"423", "446", "418", NA, "25", "659", "247", "255", "263",
"259", "276", "277", "254", "291", "257", "244", "250", NA,
"233", NA, "97", "238", NA, "92", "243", NA, "91", "239",
"91", "237", "245", "220"), CountyName = c("Statewide (i.e. no fixed location)",
"Statewide (i.e. no fixed location)", "Statewide (i.e. no fixed location)",
"Hennepin County", "City of Minneapolis", "City of Minneapolis",
"Hennepin County", "City of Minneapolis", "Hennepin County",
"Hennepin County", "Hennepin County", "Hennepin County",
"Hennepin County", "Hennepin County", "Hennepin County",
"Hennepin County", "Hennepin County", "Hennepin County",
"Hennepin County", "Hennepin County", "Hennepin County",
"Hennepin County", "Benton County", "Winona County", "Winona County",
"Winona County", "Winona County", "Winona County", "Winona County",
"Winona County", "Winona County", "Winona County", "Winona County",
"Winona County", "Winona County", "Winona County", "Winona County",
"Wabasha County", "Winona County", "Winona County", "Wabasha County",
"Winona County", "Winona County", "Wabasha County", "Winona County",
"Winona County", "Winona County", "Winona County", "Winona County",
"Winona County"), mncounty = c(NA, NA, NA, "27", NA, NA,
"27", NA, "27", "27", "27", "27", "27", "27", "27", "27",
"27", "27", "27", "27", "27", "27", "5", "85", "85", "85",
"85", "85", "85", "85", "85", "85", "85", "85", "85", "85",
"85", "79", "85", "85", "79", "85", "85", "79", "85", "85",
"85", "85", "85", "85"), IndustryGroup = c("Other Professional, Scientific, and Technical Services",
"Other Professional, Scientific, and Technical Services",
"Other Professional, Scientific, and Technical Services",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Scientific Research and Development Svc", "Scientific Research and Development Svc",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Scientific Research and Development Svc", "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Vegetable and Melon Farming", "Semiconductor and Other Electronic Component Manufacturing",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Elementary and Secondary Schools", "Executive, Legislative, and Other General Government Support ",
"Elementary and Secondary Schools", "Elementary and Secondary Schools",
"Executive, Legislative, and Other General Government Support ",
"Elementary and Secondary Schools", "Elementary and Secondary Schools",
"Executive, Legislative, and Other General Government Support ",
"Elementary and Secondary Schools", "Elementary and Secondary Schools",
"Executive, Legislative, and Other General Government Support ",
"Elementary and Secondary Schools", "Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support ",
"Executive, Legislative, and Other General Government Support "
), industry = c("All Other Professional, Scientific, and Technical Services",
"All Other Professional, Scientific, and Technical Services",
"All Other Professional, Scientific, and Technical Services",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Physical/Engineering/Biological Research", "Physical/Engineering/Biological Research",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Physical/Engineering/Biological Research", "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Navigational, Measuring, Electromedical, and Control Instruments Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Semiconductor and Other Electronic Component Manufacturing",
"Vegetable and Melon Farming", "Semiconductor and Other Electronic Component Manufacturing",
"Executive Offices ", "Executive Offices ", "Executive Offices ",
"Executive Offices ", "Executive Offices ", "Executive Offices ",
"Executive Offices ", "Executive Offices ", "Executive Offices ",
"Executive Offices ", "Executive Offices ", "Elementary and Secondary Schools ",
"Executive Offices ", "Elementary and Secondary Schools ",
"Elementary and Secondary Schools ", "Executive Offices ",
"Elementary and Secondary Schools ", "Elementary and Secondary Schools ",
"Executive Offices ", "Elementary and Secondary Schools ",
"Elementary and Seco
您可以reframe()在此处使用across():
reframe(\n ungroup(df),\n across(contains("QuarterlyWages"), ~max(as.numeric(.x))),\n .by=PersonID:CalendarYear\n)\nRun Code Online (Sandbox Code Playgroud)\n输出:
\n# A tibble: 38 \xc3\x97 6\n PersonID CalendarYear EmployeeQuarterlyWages_Qtr1 EmployeeQuarterlyWages_Qtr2 EmployeeQuarterlyWages_\xe2\x80\xa6\xc2\xb9\n <chr> <int> <dbl> <dbl> <dbl>\n 1 30 2003 25974 21911 25563\n 2 30 2004 21961 22970 26941\n 3 30 2005 40279 29353 29161\n 4 30 2006 28924 28490 33522\n 5 30 2007 47695 29964 37714\n 6 30 2008 51586 34491 37519\n 7 30 2009 53059 32600 33862\n 8 30 2010 34615 32250 37625\n 9 30 2011 51267 37819 42420\n10 30 2012 65367 41553 37400\n# \xe2\x84\xb9 28 more rows\n# \xe2\x84\xb9 abbreviated name: \xc2\xb9\xe2\x80\x8bEmployeeQuarterlyWages_Qtr3\n# \xe2\x84\xb9 1 more variable: EmployeeQuarterlyWages_Qtr4 <dbl>\n# \xe2\x84\xb9 Use `print(n = ...)` to see more rows\nRun Code Online (Sandbox Code Playgroud)\n