R

Installing R

Download R from CRAN

R can be downloaded from its official website CRAN (The Comprehensive R Archive Network).

CRAN

Install R using conda

It is convenient to use conda to manage your R environment.

conda install -c conda-forge r-base=4.x.x

IDE for R: Posit(Rstudio)

Posit(Rstudio) is one of the most commonly used Integrated development environment(IDE) for R.

https://posit.co/

Use R in interactive mode

Run R script

Rscript mycode.R

Installing and Using R packages

install.packages("package_name")

library(package_name)

Basic syntax

Assignment and Evaluation

> x <- 1

> x
[1] 1

> print(x)
[1] 1

Data types

Atomic data types

logical, integer, real, complex, string (or character)

Atomic data types	Description	Examples
logical	boolean	`TRUE`, `FALSE`
integer	integer	`1`,`2`
numeric	float number	`0.01`
complex	complex number	`1+0i`
string	string or chracter	`abc`

Vectors

myvector <- c(1,2,3)
myvector < 1:3

myvector <- c(TRUE,FALSE)
myvector <- c(0.01, 0.02)
myvector <- c(1+0i, 2+3i)
myvector <- c("a","bc")

Matrices

> mymatrix <- matrix(1:6, nrow = 2, ncol = 3)
> mymatrix
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

> ncol(mymatrix)
[1] 3
> nrow(mymatrix)
[1] 2
> dim(mymatrix)
[1] 2 3
> length(mymatrix)
[1] 6

List

list() is a special vector-like data type that can contain different data types.

> mylist <- list(1, 0.02, "a", FALSE, c(1,2,3), matrix(1:6,nrow=2,ncol=3))
> mylist
[[1]]
[1] 1

[[2]]
[1] 0.02

[[3]]
[1] "a"

[[4]]
[1] FALSE

[[5]]
[1] 1 2 3

[[6]]
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

Dataframe

> df <- data.frame(score = c(90,80,70,60),  rank = c("a", "b", "c", "d"))
> df
  score rank
1    90    a
2    80    b
3    70    c
4    60    d

Subsetting

myvector
[1] 1 2 3
> myvector[0]
integer(0)
> myvector[1]
[1] 1
myvector[1:2]
[1] 1 2
> myvector[-1]
[1] 2 3
> myvector[-1:-2]
[1] 3

> mymatrix
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> mymatrix[0]
integer(0)
> mymatrix[1]
[1] 1
> mymatrix[1,]
[1] 1 3 5
> mymatrix[1,2]
[1] 3
> mymatrix[1:2,2]
[1] 3 4
> mymatrix[,2]
[1] 3 4

> df
  score rank
1    90    a
2    80    b
3    70    c
4    60    d
> df[score]
Error in `[.data.frame`(df, score) : object 'score' not found
> df[[score]]
Error in (function(x, i, exact) if (is.matrix(i)) as.matrix(x)[[i]] else .subset2(x,  :
  object 'score' not found
> df[["score"]]
[1] 90 80 70 60
> df["score"]
  score
1    90
2    80
3    70
4    60
> df[1, "score"]
[1] 90
> df[1:2, "score"]
[1] 90 80
> df[1:2,2]
[1] "a" "b"
> df[1:2,1]
[1] 90 80
> df[,c("rank","score")]
  rank score
1    a    90
2    b    80
3    c    70
4    d    60

Data Input and Output

mydata <- read.table("data.txt", header=T)

write.table(mydata, "data.txt")

Control flow

if

if (x > y){
  print ("x")
} else if (x < y){
  print ("y")
} else {
  print("tie")
}

for

> for (x in 1:5) {
    print(x)
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5

while

x<-0
while (x<5)
{
    x<-x+1
    print("Hello world")
}

[1] "Hello world"
[1] "Hello world"
[1] "Hello world"
[1] "Hello world"
[1] "Hello world"

Functions

myfunction <- function(x){
  // actual code here
  return(result)
}

> my_add_function <- function(x,y){
  c = x + y
  return(c)
}
> my_add_function(1,3)
[1] 4

Statistical functions

Normal distribution

Function	Description
dnorm(x, mean = 0, sd = 1, log = FALSE)	probability density function
pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)	cumulative density function
qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)	quantile function
rnorm(n, mean = 0, sd = 1)	generate random values from normal distribution

> dnorm(1.96)
[1] 0.05844094

> pnorm(1.96)
[1] 0.9750021

> pnorm(1.96, lower.tail=FALSE)
[1] 0.0249979

> qnorm(0.975)
[1] 1.959964

> rnorm(10)
 [1] -0.05595019  0.83176199  0.58362601 -0.89434812  0.85722843  0.96199308
 [7]  0.47782706 -0.46322066  0.03525421 -1.00715141

Chi-square distribution

Function	Description
dchisq(x, df, ncp = 0, log = FALSE)	probability density function
pchisq(q, df, ncp = 0, lower.tail = TRUE, log.p = FALSE)	cumulative density function
qchisq(p, df, ncp = 0, lower.tail = TRUE, log.p = FALSE)	quantile function
rchisq(n, df, ncp = 0)	generate random values from normal distribution

Regression

lm(formula, data, subset, weights, na.action,
   method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE,
   singular.ok = TRUE, contrasts = NULL, offset, …)

# linear regression
results <- lm(formula = y ~ x1 + x2)

# logistic regression
results <- lm(formula = y ~ x1 + x2, family = "binomial")

Reference: - https://stat.ethz.ch/R-manual/R-devel/library/stats/html/lm.html