Simplify Your Data Cleaning: Replace Text in R

Simplify Your Data Cleaning: Replace Text in R

if(!require(remotes)) install.packages("readr")
if(!require(dplyr)) install.packages("dplyr")
if(!require(tidyr)) install.packages("tidyr")
library (readr)
library(dplyr)
library(tidyr)

github="https://raw.githubusercontent.com/agronomy4future/raw_data_practice/main/yield_per_location.csv"
df=data.frame(read_csv(url(github),show_col_types= FALSE))

df.transpose= data.frame(df %>%
       group_by(Genotype, Nitrogen, Block) %>%
       pivot_longer(
       cols= c(Location1, Location2, Location3, Location4, Location5, 
               Location6, Location7, Location8, Location9, Location10, 
               Location11, Location12),
       names_to= "Location",
       values_to= "Yield"))

print(head(df.transpose, 12))
   Genotype Nitrogen Block   Location Yield
1       CV1       N0     I  Location1  98.0
2       CV1       N0     I  Location2  96.5
3       CV1       N0     I  Location3 115.8
4       CV1       N0     I  Location4  94.1
5       CV1       N0     I  Location5  82.8
6       CV1       N0     I  Location6 115.8
7       CV1       N0     I  Location7 110.0
8       CV1       N0     I  Location8  97.9
9       CV1       N0     I  Location9 107.6
10      CV1       N0     I Location10 128.6
11      CV1       N0     I Location11  74.3
12      CV1       N0     I Location12 121.3
.
.
.

1) to replace text with an empty string

dataA= df.transpose %>%
                    mutate(Yield= as.numeric(Yield),
                    Site= as.numeric(gsub("Location", "", Location)))

print(head(dataA, 12))  
   Genotype Nitrogen Block   Location Yield Site
1       CV1       N0     I  Location1  98.0    1
2       CV1       N0     I  Location2  96.5    2
3       CV1       N0     I  Location3 115.8    3
4       CV1       N0     I  Location4  94.1    4
5       CV1       N0     I  Location5  82.8    5
6       CV1       N0     I  Location6 115.8    6
7       CV1       N0     I  Location7 110.0    7
8       CV1       N0     I  Location8  97.9    8
9       CV1       N0     I  Location9 107.6    9
10      CV1       N0     I Location10 128.6   10
11      CV1       N0     I Location11  74.3   11
12      CV1       N0     I Location12 121.3   12
.
.
.

2) to replace text with other texts

df.transpose$SiteInfo= gsub("Location", "Site", df.transpose$Location)

print(head(df.transpose, 12))
   Genotype Nitrogen Block   Location Yield SiteInfo
1       CV1       N0     I  Location1  98.0    Site1
2       CV1       N0     I  Location2  96.5    Site2
3       CV1       N0     I  Location3 115.8    Site3
4       CV1       N0     I  Location4  94.1    Site4
5       CV1       N0     I  Location5  82.8    Site5
6       CV1       N0     I  Location6 115.8    Site6
7       CV1       N0     I  Location7 110.0    Site7
8       CV1       N0     I  Location8  97.9    Site8
9       CV1       N0     I  Location9 107.6    Site9
10      CV1       N0     I Location10 128.6   Site10
11      CV1       N0     I Location11  74.3   Site11
12      CV1       N0     I Location12 121.3   Site12
.
.
.

Comments are closed.